Re: Linux 6.6.28

From: Greg Kroah-Hartman
Date: Wed Apr 17 2024 - 05:38:14 EST


diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index 9edb2860a3e1..e0a1be97fa75 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -439,12 +439,12 @@ The possible values in this file are:
- System is protected by retpoline
* - BHI: BHI_DIS_S
- System is protected by BHI_DIS_S
- * - BHI: SW loop; KVM SW loop
+ * - BHI: SW loop, KVM SW loop
- System is protected by software clearing sequence
- * - BHI: Syscall hardening
- - Syscalls are hardened against BHI
- * - BHI: Syscall hardening; KVM: SW loop
- - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence
+ * - BHI: Vulnerable
+ - System is vulnerable to BHI
+ * - BHI: Vulnerable, KVM: SW loop
+ - System is vulnerable; KVM is protected by software clearing sequence

Full mitigation might require a microcode update from the CPU
vendor. When the necessary microcode is not available, the kernel will
@@ -661,18 +661,14 @@ kernel command line.
spectre_bhi=

[X86] Control mitigation of Branch History Injection
- (BHI) vulnerability. Syscalls are hardened against BHI
- regardless of this setting. This setting affects the deployment
+ (BHI) vulnerability. This setting affects the deployment
of the HW BHI control and the SW BHB clearing sequence.

on
- unconditionally enable.
+ (default) Enable the HW or SW mitigation as
+ needed.
off
- unconditionally disable.
- auto
- enable if hardware mitigation
- control(BHI_DIS_S) is available, otherwise
- enable alternate mitigation in KVM.
+ Disable the mitigation.

For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 61199466c043..4cd15aee16c2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3343,6 +3343,7 @@
reg_file_data_sampling=off [X86]
retbleed=off [X86]
spec_store_bypass_disable=off [X86,PPC]
+ spectre_bhi=off [X86]
spectre_v2_user=off [X86]
srbds=off [X86,INTEL]
ssbd=force-off [ARM64]
@@ -5921,16 +5922,13 @@
See Documentation/admin-guide/laptops/sonypi.rst

spectre_bhi= [X86] Control mitigation of Branch History Injection
- (BHI) vulnerability. Syscalls are hardened against BHI
- reglardless of this setting. This setting affects the
+ (BHI) vulnerability. This setting affects the
deployment of the HW BHI control and the SW BHB
clearing sequence.

- on - unconditionally enable.
- off - unconditionally disable.
- auto - (default) enable hardware mitigation
- (BHI_DIS_S) if available, otherwise enable
- alternate mitigation in KVM.
+ on - (default) Enable the HW or SW mitigation
+ as needed.
+ off - Disable the mitigation.

spectre_v2= [X86] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
diff --git a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
index 5eaa3ab6c73e..b842bcb14255 100644
--- a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
+++ b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
@@ -54,6 +54,7 @@ ena_common_defs.h Common definitions for ena_com layer.
ena_regs_defs.h Definition of ENA PCI memory-mapped (MMIO) registers.
ena_netdev.[ch] Main Linux kernel driver.
ena_ethtool.c ethtool callbacks.
+ena_xdp.[ch] XDP files
ena_pci_id_tbl.h Supported device IDs.
================= ======================================================

diff --git a/Makefile b/Makefile
index c3077c5d5766..a3839877aafd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 6
PATCHLEVEL = 6
-SUBLEVEL = 27
+SUBLEVEL = 28
EXTRAVERSION =
NAME = Hurr durr I'ma ninja sloth

diff --git a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
index ba7231b364bb..7bab113ca6da 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
@@ -210,6 +210,7 @@ ov2680_to_mipi: endpoint {
remote-endpoint = <&mipi_from_sensor>;
clock-lanes = <0>;
data-lanes = <1>;
+ link-frequencies = /bits/ 64 <330000000>;
};
};
};
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index 8e3b5068d4ab..b45a3879eb34 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -79,10 +79,8 @@ static struct musb_hdrc_platform_data tusb_data = {
static struct gpiod_lookup_table tusb_gpio_table = {
.dev_id = "musb-tusb",
.table = {
- GPIO_LOOKUP("gpio-0-15", 0, "enable",
- GPIO_ACTIVE_HIGH),
- GPIO_LOOKUP("gpio-48-63", 10, "int",
- GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-0-31", 0, "enable", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-32-63", 26, "int", GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -140,12 +138,11 @@ static int slot1_cover_open;
static int slot2_cover_open;
static struct device *mmc_device;

-static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
+static struct gpiod_lookup_table nokia800_mmc_gpio_table = {
.dev_id = "mmci-omap.0",
.table = {
/* Slot switch, GPIO 96 */
- GPIO_LOOKUP("gpio-80-111", 16,
- "switch", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -153,12 +150,12 @@ static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
static struct gpiod_lookup_table nokia810_mmc_gpio_table = {
.dev_id = "mmci-omap.0",
.table = {
+ /* Slot switch, GPIO 96 */
+ GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
/* Slot index 1, VSD power, GPIO 23 */
- GPIO_LOOKUP_IDX("gpio-16-31", 7,
- "vsd", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH),
/* Slot index 1, VIO power, GPIO 9 */
- GPIO_LOOKUP_IDX("gpio-0-15", 9,
- "vio", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio-0-31", 9, "vio", 1, GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -415,8 +412,6 @@ static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC];

static void __init n8x0_mmc_init(void)
{
- gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table);
-
if (board_is_n810()) {
mmc1_data.slots[0].name = "external";

@@ -429,6 +424,8 @@ static void __init n8x0_mmc_init(void)
mmc1_data.slots[1].name = "internal";
mmc1_data.slots[1].ban_openended = 1;
gpiod_add_lookup_table(&nokia810_mmc_gpio_table);
+ } else {
+ gpiod_add_lookup_table(&nokia800_mmc_gpio_table);
}

mmc1_data.nr_slots = 2;
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
index fc1a5d34382b..49298cd9eb0d 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
@@ -41,7 +41,7 @@ usbotg1: usb@5b0d0000 {
interrupts = <GIC_SPI 267 IRQ_TYPE_LEVEL_HIGH>;
fsl,usbphy = <&usbphy1>;
fsl,usbmisc = <&usbmisc1 0>;
- clocks = <&usb2_lpcg 0>;
+ clocks = <&usb2_lpcg IMX_LPCG_CLK_6>;
ahb-burst-config = <0x0>;
tx-burst-size-dword = <0x10>;
rx-burst-size-dword = <0x10>;
@@ -58,7 +58,7 @@ usbmisc1: usbmisc@5b0d0200 {
usbphy1: usbphy@5b100000 {
compatible = "fsl,imx7ulp-usbphy";
reg = <0x5b100000 0x1000>;
- clocks = <&usb2_lpcg 1>;
+ clocks = <&usb2_lpcg IMX_LPCG_CLK_7>;
power-domains = <&pd IMX_SC_R_USB_0_PHY>;
status = "disabled";
};
@@ -67,8 +67,8 @@ usdhc1: mmc@5b010000 {
interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b010000 0x10000>;
clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>,
- <&sdhc0_lpcg IMX_LPCG_CLK_0>,
- <&sdhc0_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc0_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_0>;
status = "disabled";
@@ -78,8 +78,8 @@ usdhc2: mmc@5b020000 {
interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b020000 0x10000>;
clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>,
- <&sdhc1_lpcg IMX_LPCG_CLK_0>,
- <&sdhc1_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc1_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc1_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_1>;
fsl,tuning-start-tap = <20>;
@@ -91,8 +91,8 @@ usdhc3: mmc@5b030000 {
interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b030000 0x10000>;
clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>,
- <&sdhc2_lpcg IMX_LPCG_CLK_0>,
- <&sdhc2_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc2_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc2_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_2>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
index adb98a72bdfd..89857e14c461 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
@@ -27,8 +27,8 @@ lpspi0: spi@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi0_lpcg 0>,
- <&spi0_lpcg 1>;
+ clocks = <&spi0_lpcg IMX_LPCG_CLK_0>,
+ <&spi0_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -43,8 +43,8 @@ lpspi1: spi@5a010000 {
#size-cells = <0>;
interrupts = <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi1_lpcg 0>,
- <&spi1_lpcg 1>;
+ clocks = <&spi1_lpcg IMX_LPCG_CLK_0>,
+ <&spi1_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -59,8 +59,8 @@ lpspi2: spi@5a020000 {
#size-cells = <0>;
interrupts = <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi2_lpcg 0>,
- <&spi2_lpcg 1>;
+ clocks = <&spi2_lpcg IMX_LPCG_CLK_0>,
+ <&spi2_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -75,8 +75,8 @@ lpspi3: spi@5a030000 {
#size-cells = <0>;
interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi3_lpcg 0>,
- <&spi3_lpcg 1>;
+ clocks = <&spi3_lpcg IMX_LPCG_CLK_0>,
+ <&spi3_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -282,8 +282,8 @@ adc0: adc@5a880000 {
reg = <0x5a880000 0x10000>;
interrupts = <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&adc0_lpcg 0>,
- <&adc0_lpcg 1>;
+ clocks = <&adc0_lpcg IMX_LPCG_CLK_0>,
+ <&adc0_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_ADC_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -297,8 +297,8 @@ adc1: adc@5a890000 {
reg = <0x5a890000 0x10000>;
interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&adc1_lpcg 0>,
- <&adc1_lpcg 1>;
+ clocks = <&adc1_lpcg IMX_LPCG_CLK_0>,
+ <&adc1_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_ADC_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -311,8 +311,8 @@ flexcan1: can@5a8d0000 {
reg = <0x5a8d0000 0x10000>;
interrupts = <GIC_SPI 235 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
@@ -332,8 +332,8 @@ flexcan2: can@5a8e0000 {
* CAN1 shares CAN0's clock and to enable CAN0's clock it
* has to be powered on.
*/
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
@@ -353,8 +353,8 @@ flexcan3: can@5a8f0000 {
* CAN2 shares CAN0's clock and to enable CAN0's clock it
* has to be powered on.
*/
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
index 133f2b1ce1d2..c66449798efc 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
@@ -32,8 +32,8 @@ lsio_pwm0: pwm@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d000000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm0_lpcg 4>,
- <&pwm0_lpcg 1>;
+ clocks = <&pwm0_lpcg IMX_LPCG_CLK_6>,
+ <&pwm0_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -45,8 +45,8 @@ lsio_pwm1: pwm@5d010000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d010000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm1_lpcg 4>,
- <&pwm1_lpcg 1>;
+ clocks = <&pwm1_lpcg IMX_LPCG_CLK_6>,
+ <&pwm1_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -58,8 +58,8 @@ lsio_pwm2: pwm@5d020000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d020000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm2_lpcg 4>,
- <&pwm2_lpcg 1>;
+ clocks = <&pwm2_lpcg IMX_LPCG_CLK_6>,
+ <&pwm2_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -71,8 +71,8 @@ lsio_pwm3: pwm@5d030000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d030000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm3_lpcg 4>,
- <&pwm3_lpcg 1>;
+ clocks = <&pwm3_lpcg IMX_LPCG_CLK_6>,
+ <&pwm3_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
index e9b198c13b2f..d896135f31fc 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
@@ -49,15 +49,15 @@ &flexcan1 {
};

&flexcan2 {
- clocks = <&can1_lpcg 1>,
- <&can1_lpcg 0>;
+ clocks = <&can1_lpcg IMX_LPCG_CLK_4>,
+ <&can1_lpcg IMX_LPCG_CLK_0>;
assigned-clocks = <&clk IMX_SC_R_CAN_1 IMX_SC_PM_CLK_PER>;
fsl,clk-source = /bits/ 8 <1>;
};

&flexcan3 {
- clocks = <&can2_lpcg 1>,
- <&can2_lpcg 0>;
+ clocks = <&can2_lpcg IMX_LPCG_CLK_4>,
+ <&can2_lpcg IMX_LPCG_CLK_0>;
assigned-clocks = <&clk IMX_SC_R_CAN_2 IMX_SC_PM_CLK_PER>;
fsl,clk-source = /bits/ 8 <1>;
};
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b4e6859542a3..0ca3130c6c8f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2566,31 +2566,16 @@ config MITIGATION_RFDS
stored in floating point, vector and integer registers.
See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>

-choice
- prompt "Clear branch history"
+config MITIGATION_SPECTRE_BHI
+ bool "Mitigate Spectre-BHB (Branch History Injection)"
depends on CPU_SUP_INTEL
- default SPECTRE_BHI_ON
+ default y
help
Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
where the branch history buffer is poisoned to speculatively steer
indirect branches.
See <file:Documentation/admin-guide/hw-vuln/spectre.rst>

-config SPECTRE_BHI_ON
- bool "on"
- help
- Equivalent to setting spectre_bhi=on command line parameter.
-config SPECTRE_BHI_OFF
- bool "off"
- help
- Equivalent to setting spectre_bhi=off command line parameter.
-config SPECTRE_BHI_AUTO
- bool "auto"
- help
- Equivalent to setting spectre_bhi=auto command line parameter.
-
-endchoice
-
endif

config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 185f902e5f28..c688cb22dcd6 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ cpuc->assign[i-1] = cpuc->assign[i];
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 5af4ec1a0f71..33aa0c31c21c 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -13,6 +13,7 @@
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/hardirq.h>
+#include <asm/io.h>

#define ARCH_APICTIMER_STOPS_ON_C3 1

@@ -96,7 +97,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)

static inline u32 native_apic_mem_read(u32 reg)
{
- return *((volatile u32 *)(APIC_BASE + reg));
+ return readl((void __iomem *)(APIC_BASE + reg));
}

static inline void native_apic_mem_eoi(void)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3cdf48493546..ab88a27ff433 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1724,11 +1724,11 @@ static int x2apic_state;

static bool x2apic_hw_locked(void)
{
- u64 ia32_cap;
+ u64 x86_arch_cap_msr;
u64 msr;

- ia32_cap = x86_read_arch_cap_msr();
- if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) {
+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+ if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
return (msr & LEGACY_XAPIC_DISABLED);
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 3fc230155627..5ff69b1d39b2 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -61,6 +61,8 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
EXPORT_SYMBOL_GPL(x86_pred_cmd);

+static u64 __ro_after_init x86_arch_cap_msr;
+
static DEFINE_MUTEX(spec_ctrl_mutex);

void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
@@ -144,6 +146,8 @@ void __init cpu_select_mitigations(void)
x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
}

+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@@ -301,8 +305,6 @@ static const char * const taa_strings[] = {

static void __init taa_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_TAA)) {
taa_mitigation = TAA_MITIGATION_OFF;
return;
@@ -341,9 +343,8 @@ static void __init taa_select_mitigation(void)
* On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
* update is required.
*/
- ia32_cap = x86_read_arch_cap_msr();
- if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
- !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+ if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))
taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;

/*
@@ -401,8 +402,6 @@ static const char * const mmio_strings[] = {

static void __init mmio_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
cpu_mitigations_off()) {
@@ -413,8 +412,6 @@ static void __init mmio_select_mitigation(void)
if (mmio_mitigation == MMIO_MITIGATION_OFF)
return;

- ia32_cap = x86_read_arch_cap_msr();
-
/*
* Enable CPU buffer clear mitigation for host and VMM, if also affected
* by MDS or TAA. Otherwise, enable mitigation for VMM only.
@@ -437,7 +434,7 @@ static void __init mmio_select_mitigation(void)
* be propagated to uncore buffers, clearing the Fill buffers on idle
* is required irrespective of SMT state.
*/
- if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
+ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
static_branch_enable(&mds_idle_clear);

/*
@@ -447,10 +444,10 @@ static void __init mmio_select_mitigation(void)
* FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
* affected systems.
*/
- if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
+ if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
(boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
- !(ia32_cap & ARCH_CAP_MDS_NO)))
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)))
mmio_mitigation = MMIO_MITIGATION_VERW;
else
mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
@@ -508,7 +505,7 @@ static void __init rfds_select_mitigation(void)
if (rfds_mitigation == RFDS_MITIGATION_OFF)
return;

- if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else
rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
@@ -659,8 +656,6 @@ void update_srbds_msr(void)

static void __init srbds_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
return;

@@ -669,8 +664,7 @@ static void __init srbds_select_mitigation(void)
* are only exposed to SRBDS when TSX is enabled or when CPU is affected
* by Processor MMIO Stale Data vulnerability.
*/
- ia32_cap = x86_read_arch_cap_msr();
- if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
+ if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
@@ -813,7 +807,7 @@ static void __init gds_select_mitigation(void)
/* Will verify below that mitigation _can_ be disabled */

/* No microcode */
- if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+ if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) {
if (gds_mitigation == GDS_MITIGATION_FORCE) {
/*
* This only needs to be done on the boot CPU so do it
@@ -1543,20 +1537,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
return SPECTRE_V2_RETPOLINE;
}

+static bool __ro_after_init rrsba_disabled;
+
/* Disable in-kernel use of non-RSB RET predictors */
static void __init spec_ctrl_disable_kernel_rrsba(void)
{
- u64 ia32_cap;
+ if (rrsba_disabled)
+ return;

- if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) {
+ rrsba_disabled = true;
return;
+ }

- ia32_cap = x86_read_arch_cap_msr();
+ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ return;

- if (ia32_cap & ARCH_CAP_RRSBA) {
- x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
- update_spec_ctrl(x86_spec_ctrl_base);
- }
+ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ rrsba_disabled = true;
}

static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
@@ -1625,13 +1624,10 @@ static bool __init spec_ctrl_bhi_dis(void)
enum bhi_mitigations {
BHI_MITIGATION_OFF,
BHI_MITIGATION_ON,
- BHI_MITIGATION_AUTO,
};

static enum bhi_mitigations bhi_mitigation __ro_after_init =
- IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON :
- IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF :
- BHI_MITIGATION_AUTO;
+ IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF;

static int __init spectre_bhi_parse_cmdline(char *str)
{
@@ -1642,8 +1638,6 @@ static int __init spectre_bhi_parse_cmdline(char *str)
bhi_mitigation = BHI_MITIGATION_OFF;
else if (!strcmp(str, "on"))
bhi_mitigation = BHI_MITIGATION_ON;
- else if (!strcmp(str, "auto"))
- bhi_mitigation = BHI_MITIGATION_AUTO;
else
pr_err("Ignoring unknown spectre_bhi option (%s)", str);

@@ -1657,9 +1651,11 @@ static void __init bhi_select_mitigation(void)
return;

/* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
- !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
- return;
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
+ spec_ctrl_disable_kernel_rrsba();
+ if (rrsba_disabled)
+ return;
+ }

if (spec_ctrl_bhi_dis())
return;
@@ -1671,9 +1667,6 @@ static void __init bhi_select_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");

- if (bhi_mitigation == BHI_MITIGATION_AUTO)
- return;
-
/* Mitigate syscalls when the mitigation is forced =on */
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
@@ -1907,8 +1900,6 @@ static void update_indir_branch_cond(void)
/* Update the static key controlling the MDS CPU buffer clear in idle */
static void update_mds_branch_idle(void)
{
- u64 ia32_cap = x86_read_arch_cap_msr();
-
/*
* Enable the idle clearing if SMT is active on CPUs which are
* affected only by MSBDS and not any other MDS variant.
@@ -1923,7 +1914,7 @@ static void update_mds_branch_idle(void)
if (sched_smt_active()) {
static_branch_enable(&mds_idle_clear);
} else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
- (ia32_cap & ARCH_CAP_FBSDP_NO)) {
+ (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
static_branch_disable(&mds_idle_clear);
}
}
@@ -2808,7 +2799,7 @@ static char *pbrsb_eibrs_state(void)
}
}

-static const char * const spectre_bhi_state(void)
+static const char *spectre_bhi_state(void)
{
if (!boot_cpu_has_bug(X86_BUG_BHI))
return "; BHI: Not affected";
@@ -2816,13 +2807,12 @@ static const char * const spectre_bhi_state(void)
return "; BHI: BHI_DIS_S";
else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
return "; BHI: SW loop, KVM: SW loop";
- else if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
- !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
+ else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled)
return "; BHI: Retpoline";
- else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
- return "; BHI: Syscall hardening, KVM: SW loop";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
+ return "; BHI: Vulnerable, KVM: SW loop";

- return "; BHI: Vulnerable (Syscall hardening enabled)";
+ return "; BHI: Vulnerable";
}

static ssize_t spectre_v2_show_state(char *buf)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fc4c9a7fb1e3..340dd6cc11af 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1329,25 +1329,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi

u64 x86_read_arch_cap_msr(void)
{
- u64 ia32_cap = 0;
+ u64 x86_arch_cap_msr = 0;

if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);

- return ia32_cap;
+ return x86_arch_cap_msr;
}

-static bool arch_cap_mmio_immune(u64 ia32_cap)
+static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr)
{
- return (ia32_cap & ARCH_CAP_FBSDP_NO &&
- ia32_cap & ARCH_CAP_PSDP_NO &&
- ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
+ return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO &&
+ x86_arch_cap_msr & ARCH_CAP_PSDP_NO &&
+ x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO);
}

-static bool __init vulnerable_to_rfds(u64 ia32_cap)
+static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
{
/* The "immunity" bit trumps everything else: */
- if (ia32_cap & ARCH_CAP_RFDS_NO)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO)
return false;

/*
@@ -1355,7 +1355,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
* indicate that mitigation is needed because guest is running on a
* vulnerable hardware or may migrate to such hardware:
*/
- if (ia32_cap & ARCH_CAP_RFDS_CLEAR)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
return true;

/* Only consult the blacklist when there is no enumeration: */
@@ -1364,11 +1364,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)

static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
- u64 ia32_cap = x86_read_arch_cap_msr();
+ u64 x86_arch_cap_msr = x86_read_arch_cap_msr();

/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
- !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+ !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO))
setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);

if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
@@ -1380,7 +1380,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);

if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
- !(ia32_cap & ARCH_CAP_SSB_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);

@@ -1388,15 +1388,15 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature
* flag and protect from vendor-specific bugs via the whitelist.
*/
- if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) {
+ if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) {
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
- !(ia32_cap & ARCH_CAP_PBRSB_NO))
+ !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
}

if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
- !(ia32_cap & ARCH_CAP_MDS_NO)) {
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) {
setup_force_cpu_bug(X86_BUG_MDS);
if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
@@ -1415,9 +1415,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* TSX_CTRL check alone is not sufficient for cases when the microcode
* update is not present or running as guest that don't get TSX_CTRL.
*/
- if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+ if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) &&
(cpu_has(c, X86_FEATURE_RTM) ||
- (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+ (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)))
setup_force_cpu_bug(X86_BUG_TAA);

/*
@@ -1443,7 +1443,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
* nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
*/
- if (!arch_cap_mmio_immune(ia32_cap)) {
+ if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
if (cpu_matches(cpu_vuln_blacklist, MMIO))
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
@@ -1451,7 +1451,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
}

if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
- if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA))
setup_force_cpu_bug(X86_BUG_RETBLEED);
}

@@ -1469,15 +1469,15 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* disabling AVX2. The only way to do this in HW is to clear XCR0[2],
* which means that AVX will be disabled.
*/
- if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+ if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) &&
boot_cpu_has(X86_FEATURE_AVX))
setup_force_cpu_bug(X86_BUG_GDS);

- if (vulnerable_to_rfds(ia32_cap))
+ if (vulnerable_to_rfds(x86_arch_cap_msr))
setup_force_cpu_bug(X86_BUG_RFDS);

/* When virtualized, eIBRS could be hidden, assume vulnerable */
- if (!(ia32_cap & ARCH_CAP_BHI_NO) &&
+ if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
!cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
(boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
@@ -1487,7 +1487,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
return;

/* Rogue Data Cache Load? No! */
- if (ia32_cap & ARCH_CAP_RDCL_NO)
+ if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO)
return;

setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4b48c2c44098..4c49a70b46bd 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1409,6 +1409,12 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
return 0;
}

+void blkg_init_queue(struct request_queue *q)
+{
+ INIT_LIST_HEAD(&q->blkg_list);
+ mutex_init(&q->blkcg_mutex);
+}
+
int blkcg_init_disk(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
@@ -1416,9 +1422,6 @@ int blkcg_init_disk(struct gendisk *disk)
bool preloaded;
int ret;

- INIT_LIST_HEAD(&q->blkg_list);
- mutex_init(&q->blkcg_mutex);
-
new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index b927a4a0ad03..5b0bdc268ade 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -188,6 +188,7 @@ struct blkcg_policy {
extern struct blkcg blkcg_root;
extern bool blkcg_debug_stats;

+void blkg_init_queue(struct request_queue *q);
int blkcg_init_disk(struct gendisk *disk);
void blkcg_exit_disk(struct gendisk *disk);

@@ -481,6 +482,7 @@ struct blkcg {
};

static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline void blkg_init_queue(struct request_queue *q) { }
static inline int blkcg_init_disk(struct gendisk *disk) { return 0; }
static inline void blkcg_exit_disk(struct gendisk *disk) { }
static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
diff --git a/block/blk-core.c b/block/blk-core.c
index 2eca76ccf4ee..a3726d8cf873 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -430,6 +430,8 @@ struct request_queue *blk_alloc_queue(int node_id)
init_waitqueue_head(&q->mq_freeze_wq);
mutex_init(&q->mq_freeze_lock);

+ blkg_init_queue(q);
+
/*
* Init percpu_ref in atomic mode so that it's faster to shutdown.
* See blk_register_queue() for details.
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 8fb70e3c7b9c..5619980d9edd 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -517,7 +517,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
atomic64_set(&vdev->unique_id_counter, 0);
- xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
+ xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 597d75baa1cc..5c4e353448f5 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1798,7 +1798,8 @@ static void acpi_scan_dep_init(struct acpi_device *adev)
if (dep->honor_dep)
adev->flags.honor_deps = 1;

- adev->dep_unmet++;
+ if (!dep->met)
+ adev->dep_unmet++;
}
}
}
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index f12beeb96629..a492f23faa87 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2493,7 +2493,7 @@ static void ata_dev_config_cdl(struct ata_device *dev)
bool cdl_enabled;
u64 val;

- if (ata_id_major_version(dev->id) < 12)
+ if (ata_id_major_version(dev->id) < 11)
goto not_supported;

if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE) ||
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 842433659e6d..0e078bf5aba0 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4780,7 +4780,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
* bail out.
*/
if (ap->pflags & ATA_PFLAG_SUSPENDED)
- goto unlock;
+ goto unlock_ap;

if (!sdev)
continue;
@@ -4793,7 +4793,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
if (do_resume) {
ret = scsi_resume_device(sdev);
if (ret == -EWOULDBLOCK)
- goto unlock;
+ goto unlock_scan;
dev->flags &= ~ATA_DFLAG_RESUMING;
}
ret = scsi_rescan_device(sdev);
@@ -4801,12 +4801,13 @@ void ata_scsi_dev_rescan(struct work_struct *work)
spin_lock_irqsave(ap->lock, flags);

if (ret)
- goto unlock;
+ goto unlock_ap;
}
}

-unlock:
+unlock_ap:
spin_unlock_irqrestore(ap->lock, flags);
+unlock_scan:
mutex_unlock(&ap->scsi_scan_mutex);

/* Reschedule with a delay if scsi_rescan_device() returned an error */
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index b12986b968da..4b4c15e94338 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -928,7 +928,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
for (cnt = 0; cnt < total; cnt++) {
payload->handles[i++] = get_pl->records[cnt].hdr.handle;
dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
- le16_to_cpu(payload->handles[i]));
+ le16_to_cpu(payload->handles[i - 1]));

if (i == max_handles) {
payload->nr_recs = i;
@@ -971,13 +971,14 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
.payload_in = &log_type,
.size_in = sizeof(log_type),
.payload_out = payload,
- .size_out = mds->payload_size,
.min_out = struct_size(payload, records, 0),
};

do {
int rc, i;

+ mbox_cmd.size_out = mds->payload_size;
+
rc = cxl_internal_send_cmd(mds, &mbox_cmd);
if (rc) {
dev_err_ratelimited(dev,
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index e0fbe964f6f0..bab4592db647 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -271,6 +271,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
struct cxl_register_map *map)
{
+ u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
u64 offset = ((u64)reg_hi << 32) |
(reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
@@ -278,11 +279,11 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
if (offset > pci_resource_len(pdev, bar)) {
dev_warn(&pdev->dev,
"BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar,
- &pdev->resource[bar], &offset, map->reg_type);
+ &pdev->resource[bar], &offset, reg_type);
return false;
}

- map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
+ map->reg_type = reg_type;
map->resource = pci_resource_start(pdev, bar) + offset;
map->max_size = pci_resource_len(pdev, bar) - offset;
return true;
diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c
index 350573518503..130d13e9cd6b 100644
--- a/drivers/firmware/arm_scmi/raw_mode.c
+++ b/drivers/firmware/arm_scmi/raw_mode.c
@@ -921,7 +921,7 @@ static int scmi_dbg_raw_mode_open(struct inode *inode, struct file *filp)
rd->raw = raw;
filp->private_data = rd;

- return 0;
+ return nonseekable_open(inode, filp);
}

static int scmi_dbg_raw_mode_release(struct inode *inode, struct file *filp)
@@ -950,6 +950,7 @@ static const struct file_operations scmi_dbg_raw_mode_reset_fops = {
.open = scmi_dbg_raw_mode_open,
.release = scmi_dbg_raw_mode_release,
.write = scmi_dbg_raw_mode_reset_write,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};

@@ -959,6 +960,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_fops = {
.read = scmi_dbg_raw_mode_message_read,
.write = scmi_dbg_raw_mode_message_write,
.poll = scmi_dbg_raw_mode_message_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};

@@ -975,6 +977,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_async_fops = {
.read = scmi_dbg_raw_mode_message_read,
.write = scmi_dbg_raw_mode_message_async_write,
.poll = scmi_dbg_raw_mode_message_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};

@@ -998,6 +1001,7 @@ static const struct file_operations scmi_dbg_raw_mode_notification_fops = {
.release = scmi_dbg_raw_mode_release,
.read = scmi_test_dbg_raw_mode_notif_read,
.poll = scmi_test_dbg_raw_mode_notif_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};

@@ -1021,6 +1025,7 @@ static const struct file_operations scmi_dbg_raw_mode_errors_fops = {
.release = scmi_dbg_raw_mode_release,
.read = scmi_test_dbg_raw_mode_errors_read,
.poll = scmi_test_dbg_raw_mode_errors_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index d0c3ec9f4fb6..c9058d58c95a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1616,7 +1616,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
}

- active_rb_bitmap |= global_active_rb_bitmap;
+ active_rb_bitmap &= global_active_rb_bitmap;
adev->gfx.config.backend_enable_mask = active_rb_bitmap;
adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 5a77ab587b59..4712ffc0a482 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -449,10 +449,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev)
{
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 0):
- return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC);
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
- return false;
default:
return true;
}
@@ -804,10 +802,35 @@ static int soc21_common_suspend(void *handle)
return soc21_common_hw_fini(adev);
}

+static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
+{
+ u32 sol_reg1, sol_reg2;
+
+ /* Will reset for the following suspend abort cases.
+ * 1) Only reset dGPU side.
+ * 2) S3 suspend got aborted and TOS is active.
+ */
+ if (!(adev->flags & AMD_IS_APU) && adev->in_s3 &&
+ !adev->suspend_complete) {
+ sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+ msleep(100);
+ sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return (sol_reg1 != sol_reg2);
+ }
+
+ return false;
+}
+
static int soc21_common_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;

+ if (soc21_need_reset_on_resume(adev)) {
+ dev_info(adev->dev, "S3 suspend aborted, resetting...");
+ soc21_asic_reset(adev);
+ }
+
return soc21_common_hw_init(adev);
}

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e07652e72496..60d98301ef04 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1980,6 +1980,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
while (halt_if_hws_hang)
schedule();
+ kfd_hws_hang(dqm);
return -ETIME;
}

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 9f4e4a01010f..3442e08f4787 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6122,19 +6122,16 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);

- if (stream->link->psr_settings.psr_feature_enabled || stream->link->replay_settings.replay_feature_enabled) {
+ if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+ stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+ stream->signal == SIGNAL_TYPE_EDP) {
//
// should decide stream support vsc sdp colorimetry capability
// before building vsc info packet
//
- stream->use_vsc_sdp_for_colorimetry = false;
- if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- stream->use_vsc_sdp_for_colorimetry =
- aconnector->dc_sink->is_vsc_sdp_colorimetry_supported;
- } else {
- if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
- stream->use_vsc_sdp_for_colorimetry = true;
- }
+ stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
+ stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED;
+
if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
tf = TRANSFER_FUNC_GAMMA_22;
mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
index 09151cc56ce4..a13ead3d21e3 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -99,20 +99,25 @@ static int dcn316_get_active_display_cnt_wa(
return display_count;
}

-static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower, bool disable)
{
struct dc *dc = clk_mgr_base->ctx->dc;
int i;

for (i = 0; i < dc->res_pool->pipe_count; ++i) {
- struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe = safe_to_lower
+ ? &context->res_ctx.pipe_ctx[i]
+ : &dc->current_state->res_ctx.pipe_ctx[i];

if (pipe->top_pipe || pipe->prev_odm_pipe)
continue;
- if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
- dc_is_virtual_signal(pipe->stream->signal))) {
+ if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
+ !pipe->stream->link_enc)) {
if (disable) {
- pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+ if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+ pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+
reset_sync_context_for_pipe(dc, context, i);
} else
pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
@@ -207,11 +212,11 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
}

if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
- dcn316_disable_otg_wa(clk_mgr_base, context, true);
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);

clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
- dcn316_disable_otg_wa(clk_mgr_base, context, false);
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);

update_dispclk = true;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 626591f54bc4..1d1917e1b63f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -226,8 +226,18 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en)
struct amdgpu_device *adev = smu->adev;
int ret = 0;

- if (!en && !adev->in_s0ix)
+ if (!en && !adev->in_s0ix) {
+ /* Adds a GFX reset as workaround just before sending the
+ * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
+ * an invalid state.
+ */
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
+ SMU_RESET_MODE_2, NULL);
+ if (ret)
+ return ret;
+
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
+ }

return ret;
}
diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c
index fdd9a493aa9c..c6f226b6f081 100644
--- a/drivers/gpu/drm/ast/ast_dp.c
+++ b/drivers/gpu/drm/ast/ast_dp.c
@@ -180,6 +180,7 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on)
{
struct ast_device *ast = to_ast_device(dev);
u8 video_on_off = on;
+ u32 i = 0;

// Video On/Off
ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xE3, (u8) ~AST_DP_VIDEO_ENABLE, on);
@@ -192,6 +193,8 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on)
ASTDP_MIRROR_VIDEO_ENABLE) != video_on_off) {
// wait 1 ms
mdelay(1);
+ if (++i > 200)
+ break;
}
}
}
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index 871e4e2129d6..0683a129b362 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -777,6 +777,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
unsigned int total_modes_count = 0;
struct drm_client_offset *offsets;
unsigned int connector_count = 0;
+ /* points to modes protected by mode_config.mutex */
struct drm_display_mode **modes;
struct drm_crtc **crtcs;
int i, ret = 0;
@@ -845,7 +846,6 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
drm_client_pick_crtcs(client, connectors, connector_count,
crtcs, modes, 0, width, height);
}
- mutex_unlock(&dev->mode_config.mutex);

drm_client_modeset_release(client);

@@ -875,6 +875,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
modeset->y = offset->y;
}
}
+ mutex_unlock(&dev->mode_config.mutex);

mutex_unlock(&client->modeset_mutex);
out:
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index f99cf8037bd6..5aa6b998a1cb 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2462,7 +2462,7 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state)
if (IS_DG2(i915))
intel_cdclk_pcode_pre_notify(state);

- if (pipe == INVALID_PIPE ||
+ if (new_cdclk_state->disable_pipes ||
old_cdclk_state->actual.cdclk <= new_cdclk_state->actual.cdclk) {
drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);

@@ -2494,7 +2494,7 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state)
if (IS_DG2(i915))
intel_cdclk_pcode_post_notify(state);

- if (pipe != INVALID_PIPE &&
+ if (!new_cdclk_state->disable_pipes &&
old_cdclk_state->actual.cdclk > new_cdclk_state->actual.cdclk) {
drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);

@@ -2946,6 +2946,7 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa
return NULL;

cdclk_state->pipe = INVALID_PIPE;
+ cdclk_state->disable_pipes = false;

return &cdclk_state->base;
}
@@ -3124,6 +3125,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state)
if (ret)
return ret;

+ new_cdclk_state->disable_pipes = true;
+
drm_dbg_kms(&dev_priv->drm,
"Modeset required for cdclk change\n");
}
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h
index 48fd7d39e0cd..71bc032bfef1 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.h
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.h
@@ -51,6 +51,9 @@ struct intel_cdclk_state {

/* bitmask of active pipes */
u8 active_pipes;
+
+ /* update cdclk with pipes disabled */
+ bool disable_pipes;
};

int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index c7e00f57cb7a..b347f9062349 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -4111,7 +4111,12 @@ static bool m_n_equal(const struct intel_link_m_n *m_n_1,
static bool crtcs_port_sync_compatible(const struct intel_crtc_state *crtc_state1,
const struct intel_crtc_state *crtc_state2)
{
+ /*
+ * FIXME the modeset sequence is currently wrong and
+ * can't deal with bigjoiner + port sync at the same time.
+ */
return crtc_state1->hw.active && crtc_state2->hw.active &&
+ !crtc_state1->bigjoiner_pipes && !crtc_state2->bigjoiner_pipes &&
crtc_state1->output_types == crtc_state2->output_types &&
crtc_state1->output_format == crtc_state2->output_format &&
crtc_state1->lane_count == crtc_state2->lane_count &&
diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c
index 88e4759b538b..b844bdd16de9 100644
--- a/drivers/gpu/drm/i915/display/intel_vrr.c
+++ b/drivers/gpu/drm/i915/display/intel_vrr.c
@@ -111,6 +111,13 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state,
if (!intel_vrr_is_capable(connector))
return;

+ /*
+ * FIXME all joined pipes share the same transcoder.
+ * Need to account for that during VRR toggle/push/etc.
+ */
+ if (crtc_state->bigjoiner_pipes)
+ return;
+
if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
return;

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
index ef871239adb2..68fae048a9a8 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
@@ -459,15 +459,15 @@ int dpu_core_perf_debugfs_init(struct dpu_kms *dpu_kms, struct dentry *parent)
&perf->core_clk_rate);
debugfs_create_u32("enable_bw_release", 0600, entry,
(u32 *)&perf->enable_bw_release);
- debugfs_create_u32("threshold_low", 0600, entry,
+ debugfs_create_u32("threshold_low", 0400, entry,
(u32 *)&perf->perf_cfg->max_bw_low);
- debugfs_create_u32("threshold_high", 0600, entry,
+ debugfs_create_u32("threshold_high", 0400, entry,
(u32 *)&perf->perf_cfg->max_bw_high);
- debugfs_create_u32("min_core_ib", 0600, entry,
+ debugfs_create_u32("min_core_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_core_ib);
- debugfs_create_u32("min_llcc_ib", 0600, entry,
+ debugfs_create_u32("min_llcc_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_llcc_ib);
- debugfs_create_u32("min_dram_ib", 0600, entry,
+ debugfs_create_u32("min_dram_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_dram_ib);
debugfs_create_file("perf_mode", 0600, entry,
(u32 *)perf, &dpu_core_perf_mode_fops);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
index 4bf486b57101..cb05f7f48a98 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
@@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name)
return ERR_PTR(-EINVAL);
}

+static void of_fini(void *p)
+{
+ kfree(p);
+}
+
const struct nvbios_source
nvbios_of = {
.name = "OpenFirmware",
.init = of_init,
- .fini = (void(*)(void *))kfree,
+ .fini = of_fini,
.read = of_read,
.size = of_size,
.rw = false,
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index c0123d09f699..83fa384f6a24 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -500,11 +500,18 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
mapping_set_unevictable(mapping);

for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) {
+ /* Can happen if the last fault only partially filled this
+ * section of the pages array before failing. In that case
+ * we skip already filled pages.
+ */
+ if (pages[i])
+ continue;
+
pages[i] = shmem_read_mapping_page(mapping, i);
if (IS_ERR(pages[i])) {
ret = PTR_ERR(pages[i]);
pages[i] = NULL;
- goto err_pages;
+ goto err_unlock;
}
}

@@ -512,7 +519,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
ret = sg_alloc_table_from_pages(sgt, pages + page_offset,
NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL);
if (ret)
- goto err_pages;
+ goto err_unlock;

ret = dma_map_sgtable(pfdev->dev, sgt, DMA_BIDIRECTIONAL, 0);
if (ret)
@@ -534,8 +541,6 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,

err_map:
sg_free_table(sgt);
-err_pages:
- drm_gem_shmem_put_pages(&bo->base);
err_unlock:
dma_resv_unlock(obj->resv);
err_bo:
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 368d26da0d6a..9febc8b73f09 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -58,16 +58,56 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr,
signed long timeout)
{
struct qxl_device *qdev;
+ struct qxl_release *release;
+ int count = 0, sc = 0;
+ bool have_drawable_releases;
unsigned long cur, end = jiffies + timeout;

qdev = container_of(fence->lock, struct qxl_device, release_lock);
+ release = container_of(fence, struct qxl_release, base);
+ have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE;

- if (!wait_event_timeout(qdev->release_event,
- (dma_fence_is_signaled(fence) ||
- (qxl_io_notify_oom(qdev), 0)),
- timeout))
- return 0;
+retry:
+ sc++;
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+
+ qxl_io_notify_oom(qdev);
+
+ for (count = 0; count < 11; count++) {
+ if (!qxl_queue_garbage_collect(qdev, true))
+ break;
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+ }
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+
+ if (have_drawable_releases || sc < 4) {
+ if (sc > 2)
+ /* back off */
+ usleep_range(500, 1000);
+
+ if (time_after(jiffies, end))
+ return 0;
+
+ if (have_drawable_releases && sc > 300) {
+ DMA_FENCE_WARN(fence,
+ "failed to wait on release %llu after spincount %d\n",
+ fence->context & ~0xf0000000, sc);
+ goto signaled;
+ }
+ goto retry;
+ }
+ /*
+ * yeah, original sync_obj_wait gave up after 3 spins when
+ * have_drawable_releases is not set.
+ */

+signaled:
cur = jiffies;
if (time_after(cur, end))
return 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index c7d90f96d16a..0a304706e013 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -666,11 +666,12 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
[vmw_dma_map_populate] = "Caching DMA mappings.",
[vmw_dma_map_bind] = "Giving up DMA mappings early."};

- /* TTM currently doesn't fully support SEV encryption. */
- if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
- return -EINVAL;
-
- if (vmw_force_coherent)
+ /*
+ * When running with SEV we always want dma mappings, because
+ * otherwise ttm tt pool pages will bounce through swiotlb running
+ * out of available space.
+ */
+ if (vmw_force_coherent || cc_platform_has(CC_ATTR_MEM_ENCRYPT))
dev_priv->map_mode = vmw_dma_alloc_coherent;
else if (vmw_restrict_iommu)
dev_priv->map_mode = vmw_dma_map_bind;
diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c
index cf43e798eca4..44083d01852d 100644
--- a/drivers/iommu/intel/perfmon.c
+++ b/drivers/iommu/intel/perfmon.c
@@ -438,7 +438,7 @@ static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
iommu_pmu_set_filter(domain, event->attr.config1,
IOMMU_PMU_FILTER_DOMAIN, idx,
event->attr.config1);
- iommu_pmu_set_filter(pasid, event->attr.config1,
+ iommu_pmu_set_filter(pasid, event->attr.config2,
IOMMU_PMU_FILTER_PASID, idx,
event->attr.config1);
iommu_pmu_set_filter(ats, event->attr.config2,
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index ac12f76c1212..6010b93c514c 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -67,7 +67,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
struct page *pages;
int irq, ret;

- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+ pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
if (!pages) {
pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
iommu->name);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 911670273d1b..cc02e7ec72c0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1473,7 +1473,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
for (j = 0; j < i; j++)
if (r1_bio->bios[j])
rdev_dec_pending(conf->mirrors[j].rdev, mddev);
- free_r1bio(r1_bio);
+ mempool_free(r1_bio, &conf->r1bio_pool);
allow_barrier(conf, bio->bi_iter.bi_sector);

if (bio->bi_opf & REQ_NOWAIT) {
diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c
index 09ca83c23329..ed1fa07e70b5 100644
--- a/drivers/media/cec/core/cec-adap.c
+++ b/drivers/media/cec/core/cec-adap.c
@@ -1124,20 +1124,6 @@ void cec_received_msg_ts(struct cec_adapter *adap,
if (valid_la && min_len) {
/* These messages have special length requirements */
switch (cmd) {
- case CEC_MSG_TIMER_STATUS:
- if (msg->msg[2] & 0x10) {
- switch (msg->msg[2] & 0xf) {
- case CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE:
- case CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE:
- if (msg->len < 5)
- valid_la = false;
- break;
- }
- } else if ((msg->msg[2] & 0xf) == CEC_OP_PROG_ERROR_DUPLICATE) {
- if (msg->len < 5)
- valid_la = false;
- }
- break;
case CEC_MSG_RECORD_ON:
switch (msg->msg[2]) {
case CEC_OP_RECORD_SRC_OWN:
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 9fb8995b43a1..13fa8588e38c 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1119,10 +1119,25 @@ static void mmc_omap_set_power(struct mmc_omap_slot *slot, int power_on,

host = slot->host;

- if (slot->vsd)
- gpiod_set_value(slot->vsd, power_on);
- if (slot->vio)
- gpiod_set_value(slot->vio, power_on);
+ if (power_on) {
+ if (slot->vsd) {
+ gpiod_set_value(slot->vsd, power_on);
+ msleep(1);
+ }
+ if (slot->vio) {
+ gpiod_set_value(slot->vio, power_on);
+ msleep(1);
+ }
+ } else {
+ if (slot->vio) {
+ gpiod_set_value(slot->vio, power_on);
+ msleep(50);
+ }
+ if (slot->vsd) {
+ gpiod_set_value(slot->vsd, power_on);
+ msleep(50);
+ }
+ }

if (slot->pdata->set_power != NULL)
slot->pdata->set_power(mmc_dev(slot->mmc), slot->id, power_on,
@@ -1259,18 +1274,18 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id)
slot->pdata = &host->pdata->slots[id];

/* Check for some optional GPIO controls */
- slot->vsd = gpiod_get_index_optional(host->dev, "vsd",
- id, GPIOD_OUT_LOW);
+ slot->vsd = devm_gpiod_get_index_optional(host->dev, "vsd",
+ id, GPIOD_OUT_LOW);
if (IS_ERR(slot->vsd))
return dev_err_probe(host->dev, PTR_ERR(slot->vsd),
"error looking up VSD GPIO\n");
- slot->vio = gpiod_get_index_optional(host->dev, "vio",
- id, GPIOD_OUT_LOW);
+ slot->vio = devm_gpiod_get_index_optional(host->dev, "vio",
+ id, GPIOD_OUT_LOW);
if (IS_ERR(slot->vio))
return dev_err_probe(host->dev, PTR_ERR(slot->vio),
"error looking up VIO GPIO\n");
- slot->cover = gpiod_get_index_optional(host->dev, "cover",
- id, GPIOD_IN);
+ slot->cover = devm_gpiod_get_index_optional(host->dev, "cover",
+ id, GPIOD_IN);
if (IS_ERR(slot->cover))
return dev_err_probe(host->dev, PTR_ERR(slot->cover),
"error looking up cover switch GPIO\n");
@@ -1384,13 +1399,6 @@ static int mmc_omap_probe(struct platform_device *pdev)
if (IS_ERR(host->virt_base))
return PTR_ERR(host->virt_base);

- host->slot_switch = gpiod_get_optional(host->dev, "switch",
- GPIOD_OUT_LOW);
- if (IS_ERR(host->slot_switch))
- return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
- "error looking up slot switch GPIO\n");
-
-
INIT_WORK(&host->slot_release_work, mmc_omap_slot_release_work);
INIT_WORK(&host->send_stop_work, mmc_omap_send_stop_work);

@@ -1409,6 +1417,12 @@ static int mmc_omap_probe(struct platform_device *pdev)
host->dev = &pdev->dev;
platform_set_drvdata(pdev, host);

+ host->slot_switch = devm_gpiod_get_optional(host->dev, "switch",
+ GPIOD_OUT_LOW);
+ if (IS_ERR(host->slot_switch))
+ return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
+ "error looking up slot switch GPIO\n");
+
host->id = pdev->id;
host->irq = irq;
host->phys_base = res->start;
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index f529bc6f5680..88f081672f6f 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -999,20 +999,173 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface)
mutex_unlock(&priv->reg_mutex);
}

-/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std
- * 802.1Q™-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA
- * must only be propagated to C-VLAN and MAC Bridge components. That means
- * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports,
- * these frames are supposed to be processed by the CPU (software). So we make
- * the switch only forward them to the CPU port. And if received from a CPU
- * port, forward to a single port. The software is responsible of making the
- * switch conform to the latter by setting a single port as destination port on
- * the special tag.
+/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL)
+ * of the Open Systems Interconnection basic reference model (OSI/RM) are
+ * described; the medium access control (MAC) and logical link control (LLC)
+ * sublayers. The MAC sublayer is the one facing the physical layer.
*
- * This switch intellectual property cannot conform to this part of the standard
- * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC
- * DAs, it also includes :22-FF which the scope of propagation is not supposed
- * to be restricted for these MAC DAs.
+ * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A
+ * Bridge component comprises a MAC Relay Entity for interconnecting the Ports
+ * of the Bridge, at least two Ports, and higher layer entities with at least a
+ * Spanning Tree Protocol Entity included.
+ *
+ * Each Bridge Port also functions as an end station and shall provide the MAC
+ * Service to an LLC Entity. Each instance of the MAC Service is provided to a
+ * distinct LLC Entity that supports protocol identification, multiplexing, and
+ * demultiplexing, for protocol data unit (PDU) transmission and reception by
+ * one or more higher layer entities.
+ *
+ * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC
+ * Entity associated with each Bridge Port is modeled as being directly
+ * connected to the attached Local Area Network (LAN).
+ *
+ * On the switch with CPU port architecture, CPU port functions as Management
+ * Port, and the Management Port functionality is provided by software which
+ * functions as an end station. Software is connected to an IEEE 802 LAN that is
+ * wholly contained within the system that incorporates the Bridge. Software
+ * provides access to the LLC Entity associated with each Bridge Port by the
+ * value of the source port field on the special tag on the frame received by
+ * software.
+ *
+ * We call frames that carry control information to determine the active
+ * topology and current extent of each Virtual Local Area Network (VLAN), i.e.,
+ * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration
+ * Protocol Data Units (MVRPDUs), and frames from other link constrained
+ * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and
+ * Link Layer Discovery Protocol (LLDP), link-local frames. They are not
+ * forwarded by a Bridge. Permanently configured entries in the filtering
+ * database (FDB) ensure that such frames are discarded by the Forwarding
+ * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail:
+ *
+ * Each of the reserved MAC addresses specified in Table 8-1
+ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be
+ * permanently configured in the FDB in C-VLAN components and ERs.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-2
+ * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently
+ * configured in the FDB in S-VLAN components.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-3
+ * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in
+ * TPMR components.
+ *
+ * The FDB entries for reserved MAC addresses shall specify filtering for all
+ * Bridge Ports and all VIDs. Management shall not provide the capability to
+ * modify or remove entries for reserved MAC addresses.
+ *
+ * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of
+ * propagation of PDUs within a Bridged Network, as follows:
+ *
+ * The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no
+ * conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN)
+ * component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward.
+ * PDUs transmitted using this destination address, or any other addresses
+ * that appear in Table 8-1, Table 8-2, and Table 8-3
+ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can
+ * therefore travel no further than those stations that can be reached via a
+ * single individual LAN from the originating station.
+ *
+ * The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an
+ * address that no conformant S-VLAN component, C-VLAN component, or MAC
+ * Bridge can forward; however, this address is relayed by a TPMR component.
+ * PDUs using this destination address, or any of the other addresses that
+ * appear in both Table 8-1 and Table 8-2 but not in Table 8-3
+ * (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by
+ * any TPMRs but will propagate no further than the nearest S-VLAN component,
+ * C-VLAN component, or MAC Bridge.
+ *
+ * The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address
+ * that no conformant C-VLAN component, MAC Bridge can forward; however, it is
+ * relayed by TPMR components and S-VLAN components. PDUs using this
+ * destination address, or any of the other addresses that appear in Table 8-1
+ * but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]),
+ * will be relayed by TPMR components and S-VLAN components but will propagate
+ * no further than the nearest C-VLAN component or MAC Bridge.
+ *
+ * Because the LLC Entity associated with each Bridge Port is provided via CPU
+ * port, we must not filter these frames but forward them to CPU port.
+ *
+ * In a Bridge, the transmission Port is majorly decided by ingress and egress
+ * rules, FDB, and spanning tree Port State functions of the Forwarding Process.
+ * For link-local frames, only CPU port should be designated as destination port
+ * in the FDB, and the other functions of the Forwarding Process must not
+ * interfere with the decision of the transmission Port. We call this process
+ * trapping frames to CPU port.
+ *
+ * Therefore, on the switch with CPU port architecture, link-local frames must
+ * be trapped to CPU port, and certain link-local frames received by a Port of a
+ * Bridge comprising a TPMR component or an S-VLAN component must be excluded
+ * from it.
+ *
+ * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port
+ * MAC Relay (TPMR) component as a TPMR component supports only a subset of the
+ * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port
+ * doesn't count) of this architecture will either function as a standard MAC
+ * Bridge or a standard VLAN Bridge.
+ *
+ * Therefore, a Bridge of this architecture can only comprise S-VLAN components,
+ * C-VLAN components, or MAC Bridge components. Since there's no TPMR component,
+ * we don't need to relay PDUs using the destination addresses specified on the
+ * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge
+ * section where they must be relayed by TPMR components.
+ *
+ * One option to trap link-local frames to CPU port is to add static FDB entries
+ * with CPU port designated as destination port. However, because that
+ * Independent VLAN Learning (IVL) is being used on every VID, each entry only
+ * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC
+ * Bridge component or a C-VLAN component, there would have to be 16 times 4096
+ * entries. This switch intellectual property can only hold a maximum of 2048
+ * entries. Using this option, there also isn't a mechanism to prevent
+ * link-local frames from being discarded when the spanning tree Port State of
+ * the reception Port is discarding.
+ *
+ * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4
+ * registers. Whilst this applies to every VID, it doesn't contain all of the
+ * reserved MAC addresses without affecting the remaining Standard Group MAC
+ * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the
+ * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination
+ * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF
+ * destination addresses which may be relayed by MAC Bridges or VLAN Bridges.
+ * The latter option provides better but not complete conformance.
+ *
+ * This switch intellectual property also does not provide a mechanism to trap
+ * link-local frames with specific destination addresses to CPU port by Bridge,
+ * to conform to the filtering rules for the distinct Bridge components.
+ *
+ * Therefore, regardless of the type of the Bridge component, link-local frames
+ * with these destination addresses will be trapped to CPU port:
+ *
+ * 01-80-C2-00-00-[00,01,02,03,0E]
+ *
+ * In a Bridge comprising a MAC Bridge component or a C-VLAN component:
+ *
+ * Link-local frames with these destination addresses won't be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F]
+ *
+ * In a Bridge comprising an S-VLAN component:
+ *
+ * Link-local frames with these destination addresses will be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-00
+ *
+ * Link-local frames with these destination addresses won't be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-[04,05,06,07,08,09,0A]
+ *
+ * To trap link-local frames to CPU port as conformant as this switch
+ * intellectual property can allow, link-local frames are made to be regarded as
+ * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual
+ * property only lets the frames regarded as BPDUs bypass the spanning tree Port
+ * State function of the Forwarding Process.
+ *
+ * The only remaining interference is the ingress rules. When the reception Port
+ * has no PVID assigned on software, VLAN-untagged frames won't be allowed in.
+ * There doesn't seem to be a mechanism on the switch intellectual property to
+ * have link-local frames bypass this function of the Forwarding Process.
*/
static void
mt753x_trap_frames(struct mt7530_priv *priv)
@@ -1020,35 +1173,43 @@ mt753x_trap_frames(struct mt7530_priv *priv)
/* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them
* VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK |
- MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
- MT753X_BPDU_PORT_FW_MASK,
- MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_BPC,
+ MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK |
+ MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
+ MT753X_BPDU_PORT_FW_MASK,
+ MT753X_PAE_BPDU_FR |
+ MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);

/* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress
* them VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK |
- MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK |
- MT753X_R01_PORT_FW_MASK,
- MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_RGAC1,
+ MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK |
+ MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR |
+ MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK,
+ MT753X_R02_BPDU_FR |
+ MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_R01_BPDU_FR |
+ MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);

/* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress
* them VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK |
- MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK |
- MT753X_R03_PORT_FW_MASK,
- MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_RGAC2,
+ MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK |
+ MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR |
+ MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK,
+ MT753X_R0E_BPDU_FR |
+ MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_R03_BPDU_FR |
+ MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
}

static int
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 75bc9043c8c0..ddefeb69afda 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -65,6 +65,7 @@ enum mt753x_id {

/* Registers for BPDU and PAE frame control*/
#define MT753X_BPC 0x24
+#define MT753X_PAE_BPDU_FR BIT(25)
#define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x)
#define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16)
@@ -75,20 +76,24 @@ enum mt753x_id {

/* Register for :01 and :02 MAC DA frame control */
#define MT753X_RGAC1 0x28
+#define MT753X_R02_BPDU_FR BIT(25)
#define MT753X_R02_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x)
#define MT753X_R02_PORT_FW_MASK GENMASK(18, 16)
#define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x)
+#define MT753X_R01_BPDU_FR BIT(9)
#define MT753X_R01_EG_TAG_MASK GENMASK(8, 6)
#define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x)
#define MT753X_R01_PORT_FW_MASK GENMASK(2, 0)

/* Register for :03 and :0E MAC DA frame control */
#define MT753X_RGAC2 0x2c
+#define MT753X_R0E_BPDU_FR BIT(25)
#define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x)
#define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16)
#define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x)
+#define MT753X_R03_BPDU_FR BIT(9)
#define MT753X_R03_EG_TAG_MASK GENMASK(8, 6)
#define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x)
#define MT753X_R03_PORT_FW_MASK GENMASK(2, 0)
diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile
index f1f752a8f7bb..6ab615365172 100644
--- a/drivers/net/ethernet/amazon/ena/Makefile
+++ b/drivers/net/ethernet/amazon/ena/Makefile
@@ -5,4 +5,4 @@

obj-$(CONFIG_ENA_ETHERNET) += ena.o

-ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o
+ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o ena_xdp.o
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 633b321d7fdd..4db689372980 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -362,7 +362,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
io_sq->bounce_buf_ctrl.next_to_use = 0;

- size = io_sq->bounce_buf_ctrl.buffer_size *
+ size = (size_t)io_sq->bounce_buf_ctrl.buffer_size *
io_sq->bounce_buf_ctrl.buffers_num;

dev_node = dev_to_node(ena_dev->dmadev);
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index d671df4b76bc..d90187754444 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -7,6 +7,7 @@
#include <linux/pci.h>

#include "ena_netdev.h"
+#include "ena_xdp.h"

struct ena_stats {
char name[ETH_GSTRING_LEN];
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index cc39707a8059..b239e473d59f 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -19,8 +19,8 @@
#include <net/ip.h>

#include "ena_netdev.h"
-#include <linux/bpf_trace.h>
#include "ena_pci_id_tbl.h"
+#include "ena_xdp.h"

MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
MODULE_DESCRIPTION(DEVICE_NAME);
@@ -45,53 +45,6 @@ static void check_for_admin_com_state(struct ena_adapter *adapter);
static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
static int ena_restore_device(struct ena_adapter *adapter);

-static void ena_init_io_rings(struct ena_adapter *adapter,
- int first_index, int count);
-static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index,
- int count);
-static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index,
- int count);
-static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid);
-static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
- int first_index,
- int count);
-static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid);
-static void ena_free_tx_resources(struct ena_adapter *adapter, int qid);
-static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget);
-static void ena_destroy_all_tx_queues(struct ena_adapter *adapter);
-static void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
-static void ena_napi_disable_in_range(struct ena_adapter *adapter,
- int first_index, int count);
-static void ena_napi_enable_in_range(struct ena_adapter *adapter,
- int first_index, int count);
-static int ena_up(struct ena_adapter *adapter);
-static void ena_down(struct ena_adapter *adapter);
-static void ena_unmask_interrupt(struct ena_ring *tx_ring,
- struct ena_ring *rx_ring);
-static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
- struct ena_ring *rx_ring);
-static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
- struct ena_tx_buffer *tx_info);
-static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
- int first_index, int count);
-static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
- int first_index, int count);
-
-/* Increase a stat by cnt while holding syncp seqlock on 32bit machines */
-static void ena_increase_stat(u64 *statp, u64 cnt,
- struct u64_stats_sync *syncp)
-{
- u64_stats_update_begin(syncp);
- (*statp) += cnt;
- u64_stats_update_end(syncp);
-}
-
-static void ena_ring_tx_doorbell(struct ena_ring *tx_ring)
-{
- ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
- ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp);
-}
-
static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct ena_adapter *adapter = netdev_priv(dev);
@@ -135,19 +88,18 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu)
return ret;
}

-static int ena_xmit_common(struct net_device *dev,
- struct ena_ring *ring,
- struct ena_tx_buffer *tx_info,
- struct ena_com_tx_ctx *ena_tx_ctx,
- u16 next_to_use,
- u32 bytes)
+int ena_xmit_common(struct ena_adapter *adapter,
+ struct ena_ring *ring,
+ struct ena_tx_buffer *tx_info,
+ struct ena_com_tx_ctx *ena_tx_ctx,
+ u16 next_to_use,
+ u32 bytes)
{
- struct ena_adapter *adapter = netdev_priv(dev);
int rc, nb_hw_desc;

if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
ena_tx_ctx))) {
- netif_dbg(adapter, tx_queued, dev,
+ netif_dbg(adapter, tx_queued, adapter->netdev,
"llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
ring->qid);
ena_ring_tx_doorbell(ring);
@@ -162,7 +114,7 @@ static int ena_xmit_common(struct net_device *dev,
* ena_com_prepare_tx() are fatal and therefore require a device reset.
*/
if (unlikely(rc)) {
- netif_err(adapter, tx_queued, dev,
+ netif_err(adapter, tx_queued, adapter->netdev,
"Failed to prepare tx bufs\n");
ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1,
&ring->syncp);
@@ -186,467 +138,6 @@ static int ena_xmit_common(struct net_device *dev,
return 0;
}

-/* This is the XDP napi callback. XDP queues use a separate napi callback
- * than Rx/Tx queues.
- */
-static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
-{
- struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
- u32 xdp_work_done, xdp_budget;
- struct ena_ring *xdp_ring;
- int napi_comp_call = 0;
- int ret;
-
- xdp_ring = ena_napi->xdp_ring;
-
- xdp_budget = budget;
-
- if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
- test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
- napi_complete_done(napi, 0);
- return 0;
- }
-
- xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
-
- /* If the device is about to reset or down, avoid unmask
- * the interrupt and return 0 so NAPI won't reschedule
- */
- if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
- napi_complete_done(napi, 0);
- ret = 0;
- } else if (xdp_budget > xdp_work_done) {
- napi_comp_call = 1;
- if (napi_complete_done(napi, xdp_work_done))
- ena_unmask_interrupt(xdp_ring, NULL);
- ena_update_ring_numa_node(xdp_ring, NULL);
- ret = xdp_work_done;
- } else {
- ret = xdp_budget;
- }
-
- u64_stats_update_begin(&xdp_ring->syncp);
- xdp_ring->tx_stats.napi_comp += napi_comp_call;
- xdp_ring->tx_stats.tx_poll++;
- u64_stats_update_end(&xdp_ring->syncp);
- xdp_ring->tx_stats.last_napi_jiffies = jiffies;
-
- return ret;
-}
-
-static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
- struct ena_tx_buffer *tx_info,
- struct xdp_frame *xdpf,
- struct ena_com_tx_ctx *ena_tx_ctx)
-{
- struct ena_adapter *adapter = xdp_ring->adapter;
- struct ena_com_buf *ena_buf;
- int push_len = 0;
- dma_addr_t dma;
- void *data;
- u32 size;
-
- tx_info->xdpf = xdpf;
- data = tx_info->xdpf->data;
- size = tx_info->xdpf->len;
-
- if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
- /* Designate part of the packet for LLQ */
- push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
-
- ena_tx_ctx->push_header = data;
-
- size -= push_len;
- data += push_len;
- }
-
- ena_tx_ctx->header_len = push_len;
-
- if (size > 0) {
- dma = dma_map_single(xdp_ring->dev,
- data,
- size,
- DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
- goto error_report_dma_error;
-
- tx_info->map_linear_data = 0;
-
- ena_buf = tx_info->bufs;
- ena_buf->paddr = dma;
- ena_buf->len = size;
-
- ena_tx_ctx->ena_bufs = ena_buf;
- ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
- }
-
- return 0;
-
-error_report_dma_error:
- ena_increase_stat(&xdp_ring->tx_stats.dma_mapping_err, 1,
- &xdp_ring->syncp);
- netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
-
- return -EINVAL;
-}
-
-static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
- struct net_device *dev,
- struct xdp_frame *xdpf,
- int flags)
-{
- struct ena_com_tx_ctx ena_tx_ctx = {};
- struct ena_tx_buffer *tx_info;
- u16 next_to_use, req_id;
- int rc;
-
- next_to_use = xdp_ring->next_to_use;
- req_id = xdp_ring->free_ids[next_to_use];
- tx_info = &xdp_ring->tx_buffer_info[req_id];
- tx_info->num_of_bufs = 0;
-
- rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
- if (unlikely(rc))
- return rc;
-
- ena_tx_ctx.req_id = req_id;
-
- rc = ena_xmit_common(dev,
- xdp_ring,
- tx_info,
- &ena_tx_ctx,
- next_to_use,
- xdpf->len);
- if (rc)
- goto error_unmap_dma;
-
- /* trigger the dma engine. ena_ring_tx_doorbell()
- * calls a memory barrier inside it.
- */
- if (flags & XDP_XMIT_FLUSH)
- ena_ring_tx_doorbell(xdp_ring);
-
- return rc;
-
-error_unmap_dma:
- ena_unmap_tx_buff(xdp_ring, tx_info);
- tx_info->xdpf = NULL;
- return rc;
-}
-
-static int ena_xdp_xmit(struct net_device *dev, int n,
- struct xdp_frame **frames, u32 flags)
-{
- struct ena_adapter *adapter = netdev_priv(dev);
- struct ena_ring *xdp_ring;
- int qid, i, nxmit = 0;
-
- if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
- return -EINVAL;
-
- if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
- return -ENETDOWN;
-
- /* We assume that all rings have the same XDP program */
- if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog))
- return -ENXIO;
-
- qid = smp_processor_id() % adapter->xdp_num_queues;
- qid += adapter->xdp_first_ring;
- xdp_ring = &adapter->tx_ring[qid];
-
- /* Other CPU ids might try to send thorugh this queue */
- spin_lock(&xdp_ring->xdp_tx_lock);
-
- for (i = 0; i < n; i++) {
- if (ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0))
- break;
- nxmit++;
- }
-
- /* Ring doorbell to make device aware of the packets */
- if (flags & XDP_XMIT_FLUSH)
- ena_ring_tx_doorbell(xdp_ring);
-
- spin_unlock(&xdp_ring->xdp_tx_lock);
-
- /* Return number of packets sent */
- return nxmit;
-}
-
-static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
-{
- u32 verdict = ENA_XDP_PASS;
- struct bpf_prog *xdp_prog;
- struct ena_ring *xdp_ring;
- struct xdp_frame *xdpf;
- u64 *xdp_stat;
-
- xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
-
- if (!xdp_prog)
- goto out;
-
- verdict = bpf_prog_run_xdp(xdp_prog, xdp);
-
- switch (verdict) {
- case XDP_TX:
- xdpf = xdp_convert_buff_to_frame(xdp);
- if (unlikely(!xdpf)) {
- trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
- xdp_stat = &rx_ring->rx_stats.xdp_aborted;
- verdict = ENA_XDP_DROP;
- break;
- }
-
- /* Find xmit queue */
- xdp_ring = rx_ring->xdp_ring;
-
- /* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
- spin_lock(&xdp_ring->xdp_tx_lock);
-
- if (ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf,
- XDP_XMIT_FLUSH))
- xdp_return_frame(xdpf);
-
- spin_unlock(&xdp_ring->xdp_tx_lock);
- xdp_stat = &rx_ring->rx_stats.xdp_tx;
- verdict = ENA_XDP_TX;
- break;
- case XDP_REDIRECT:
- if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) {
- xdp_stat = &rx_ring->rx_stats.xdp_redirect;
- verdict = ENA_XDP_REDIRECT;
- break;
- }
- trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
- xdp_stat = &rx_ring->rx_stats.xdp_aborted;
- verdict = ENA_XDP_DROP;
- break;
- case XDP_ABORTED:
- trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
- xdp_stat = &rx_ring->rx_stats.xdp_aborted;
- verdict = ENA_XDP_DROP;
- break;
- case XDP_DROP:
- xdp_stat = &rx_ring->rx_stats.xdp_drop;
- verdict = ENA_XDP_DROP;
- break;
- case XDP_PASS:
- xdp_stat = &rx_ring->rx_stats.xdp_pass;
- verdict = ENA_XDP_PASS;
- break;
- default:
- bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict);
- xdp_stat = &rx_ring->rx_stats.xdp_invalid;
- verdict = ENA_XDP_DROP;
- }
-
- ena_increase_stat(xdp_stat, 1, &rx_ring->syncp);
-out:
- return verdict;
-}
-
-static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
-{
- adapter->xdp_first_ring = adapter->num_io_queues;
- adapter->xdp_num_queues = adapter->num_io_queues;
-
- ena_init_io_rings(adapter,
- adapter->xdp_first_ring,
- adapter->xdp_num_queues);
-}
-
-static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
-{
- u32 xdp_first_ring = adapter->xdp_first_ring;
- u32 xdp_num_queues = adapter->xdp_num_queues;
- int rc = 0;
-
- rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
- if (rc)
- goto setup_err;
-
- rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues);
- if (rc)
- goto create_err;
-
- return 0;
-
-create_err:
- ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
-setup_err:
- return rc;
-}
-
-/* Provides a way for both kernel and bpf-prog to know
- * more about the RX-queue a given XDP frame arrived on.
- */
-static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
-{
- int rc;
-
- rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid, 0);
-
- if (rc) {
- netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
- "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
- rx_ring->qid, rc);
- goto err;
- }
-
- rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
- NULL);
-
- if (rc) {
- netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
- "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
- rx_ring->qid, rc);
- xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
- }
-
-err:
- return rc;
-}
-
-static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
-{
- xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
- xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
-}
-
-static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
- struct bpf_prog *prog,
- int first, int count)
-{
- struct bpf_prog *old_bpf_prog;
- struct ena_ring *rx_ring;
- int i = 0;
-
- for (i = first; i < count; i++) {
- rx_ring = &adapter->rx_ring[i];
- old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog);
-
- if (!old_bpf_prog && prog) {
- ena_xdp_register_rxq_info(rx_ring);
- rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
- } else if (old_bpf_prog && !prog) {
- ena_xdp_unregister_rxq_info(rx_ring);
- rx_ring->rx_headroom = NET_SKB_PAD;
- }
- }
-}
-
-static void ena_xdp_exchange_program(struct ena_adapter *adapter,
- struct bpf_prog *prog)
-{
- struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
-
- ena_xdp_exchange_program_rx_in_range(adapter,
- prog,
- 0,
- adapter->num_io_queues);
-
- if (old_bpf_prog)
- bpf_prog_put(old_bpf_prog);
-}
-
-static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
-{
- bool was_up;
- int rc;
-
- was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
-
- if (was_up)
- ena_down(adapter);
-
- adapter->xdp_first_ring = 0;
- adapter->xdp_num_queues = 0;
- ena_xdp_exchange_program(adapter, NULL);
- if (was_up) {
- rc = ena_up(adapter);
- if (rc)
- return rc;
- }
- return 0;
-}
-
-static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
-{
- struct ena_adapter *adapter = netdev_priv(netdev);
- struct bpf_prog *prog = bpf->prog;
- struct bpf_prog *old_bpf_prog;
- int rc, prev_mtu;
- bool is_up;
-
- is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
- rc = ena_xdp_allowed(adapter);
- if (rc == ENA_XDP_ALLOWED) {
- old_bpf_prog = adapter->xdp_bpf_prog;
- if (prog) {
- if (!is_up) {
- ena_init_all_xdp_queues(adapter);
- } else if (!old_bpf_prog) {
- ena_down(adapter);
- ena_init_all_xdp_queues(adapter);
- }
- ena_xdp_exchange_program(adapter, prog);
-
- if (is_up && !old_bpf_prog) {
- rc = ena_up(adapter);
- if (rc)
- return rc;
- }
- xdp_features_set_redirect_target(netdev, false);
- } else if (old_bpf_prog) {
- xdp_features_clear_redirect_target(netdev);
- rc = ena_destroy_and_free_all_xdp_queues(adapter);
- if (rc)
- return rc;
- }
-
- prev_mtu = netdev->max_mtu;
- netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
-
- if (!old_bpf_prog)
- netif_info(adapter, drv, adapter->netdev,
- "XDP program is set, changing the max_mtu from %d to %d",
- prev_mtu, netdev->max_mtu);
-
- } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
- netif_err(adapter, drv, adapter->netdev,
- "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
- netdev->mtu, ENA_XDP_MAX_MTU);
- NL_SET_ERR_MSG_MOD(bpf->extack,
- "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
- return -EINVAL;
- } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
- netif_err(adapter, drv, adapter->netdev,
- "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
- adapter->num_io_queues, adapter->max_num_io_queues);
- NL_SET_ERR_MSG_MOD(bpf->extack,
- "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
- * program as well as to query the current xdp program id.
- */
-static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
-{
- switch (bpf->command) {
- case XDP_SETUP_PROG:
- return ena_xdp_set(netdev, bpf);
- default:
- return -EINVAL;
- }
- return 0;
-}
-
static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
{
#ifdef CONFIG_RFS_ACCEL
@@ -688,8 +179,8 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter,
u64_stats_init(&ring->syncp);
}

-static void ena_init_io_rings(struct ena_adapter *adapter,
- int first_index, int count)
+void ena_init_io_rings(struct ena_adapter *adapter,
+ int first_index, int count)
{
struct ena_com_dev *ena_dev;
struct ena_ring *txr, *rxr;
@@ -820,9 +311,8 @@ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
tx_ring->push_buf_intermediate_buf = NULL;
}

-static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
- int first_index,
- int count)
+int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
+ int first_index, int count)
{
int i, rc = 0;

@@ -845,8 +335,8 @@ static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
return rc;
}

-static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
- int first_index, int count)
+void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
+ int first_index, int count)
{
int i;

@@ -859,7 +349,7 @@ static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
*
* Free all transmit software resources
*/
-static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
+void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
{
ena_free_all_io_tx_resources_in_range(adapter,
0,
@@ -1169,8 +659,8 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
ena_free_rx_bufs(adapter, i);
}

-static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
- struct ena_tx_buffer *tx_info)
+void ena_unmap_tx_buff(struct ena_ring *tx_ring,
+ struct ena_tx_buffer *tx_info)
{
struct ena_com_buf *ena_buf;
u32 cnt;
@@ -1205,8 +695,11 @@ static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
static void ena_free_tx_bufs(struct ena_ring *tx_ring)
{
bool print_once = true;
+ bool is_xdp_ring;
u32 i;

+ is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid);
+
for (i = 0; i < tx_ring->ring_size; i++) {
struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];

@@ -1226,10 +719,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring)

ena_unmap_tx_buff(tx_ring, tx_info);

- dev_kfree_skb_any(tx_info->skb);
+ if (is_xdp_ring)
+ xdp_return_frame(tx_info->xdpf);
+ else
+ dev_kfree_skb_any(tx_info->skb);
}
- netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
- tx_ring->qid));
+
+ if (!is_xdp_ring)
+ netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
+ tx_ring->qid));
}

static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
@@ -1272,8 +770,8 @@ static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
ena_destroy_all_rx_queues(adapter);
}

-static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
- struct ena_tx_buffer *tx_info, bool is_xdp)
+int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
+ struct ena_tx_buffer *tx_info, bool is_xdp)
{
if (tx_info)
netif_err(ring->adapter,
@@ -1305,17 +803,6 @@ static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
}

-static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
-{
- struct ena_tx_buffer *tx_info;
-
- tx_info = &xdp_ring->tx_buffer_info[req_id];
- if (likely(tx_info->xdpf))
- return 0;
-
- return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
-}
-
static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
{
struct netdev_queue *txq;
@@ -1688,6 +1175,7 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u

return ret;
}
+
/* ena_clean_rx_irq - Cleanup RX irq
* @rx_ring: RX ring to clean
* @napi: napi handler
@@ -1880,8 +1368,8 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
rx_ring->per_napi_packets = 0;
}

-static void ena_unmask_interrupt(struct ena_ring *tx_ring,
- struct ena_ring *rx_ring)
+void ena_unmask_interrupt(struct ena_ring *tx_ring,
+ struct ena_ring *rx_ring)
{
u32 rx_interval = tx_ring->smoothed_interval;
struct ena_eth_io_intr_reg intr_reg;
@@ -1913,8 +1401,8 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring,
ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
}

-static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
- struct ena_ring *rx_ring)
+void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+ struct ena_ring *rx_ring)
{
int cpu = get_cpu();
int numa_node;
@@ -1949,67 +1437,6 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
put_cpu();
}

-static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
-{
- u32 total_done = 0;
- u16 next_to_clean;
- int tx_pkts = 0;
- u16 req_id;
- int rc;
-
- if (unlikely(!xdp_ring))
- return 0;
- next_to_clean = xdp_ring->next_to_clean;
-
- while (tx_pkts < budget) {
- struct ena_tx_buffer *tx_info;
- struct xdp_frame *xdpf;
-
- rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
- &req_id);
- if (rc) {
- if (unlikely(rc == -EINVAL))
- handle_invalid_req_id(xdp_ring, req_id, NULL,
- true);
- break;
- }
-
- /* validate that the request id points to a valid xdp_frame */
- rc = validate_xdp_req_id(xdp_ring, req_id);
- if (rc)
- break;
-
- tx_info = &xdp_ring->tx_buffer_info[req_id];
- xdpf = tx_info->xdpf;
-
- tx_info->xdpf = NULL;
- tx_info->last_jiffies = 0;
- ena_unmap_tx_buff(xdp_ring, tx_info);
-
- netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
- "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
- xdpf);
-
- tx_pkts++;
- total_done += tx_info->tx_descs;
-
- xdp_return_frame(xdpf);
- xdp_ring->free_ids[next_to_clean] = req_id;
- next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
- xdp_ring->ring_size);
- }
-
- xdp_ring->next_to_clean = next_to_clean;
- ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
- ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
-
- netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
- "tx_poll: q %d done. total pkts: %d\n",
- xdp_ring->qid, tx_pkts);
-
- return tx_pkts;
-}
-
static int ena_io_poll(struct napi_struct *napi, int budget)
{
struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
@@ -2326,8 +1753,8 @@ static void ena_del_napi_in_range(struct ena_adapter *adapter,
for (i = first_index; i < first_index + count; i++) {
netif_napi_del(&adapter->ena_napi[i].napi);

- WARN_ON(!ENA_IS_XDP_INDEX(adapter, i) &&
- adapter->ena_napi[i].xdp_ring);
+ WARN_ON(ENA_IS_XDP_INDEX(adapter, i) &&
+ adapter->ena_napi[i].rx_ring);
}
}

@@ -2342,12 +1769,10 @@ static void ena_init_napi_in_range(struct ena_adapter *adapter,
netif_napi_add(adapter->netdev, &napi->napi,
ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll);

- if (!ENA_IS_XDP_INDEX(adapter, i)) {
+ if (!ENA_IS_XDP_INDEX(adapter, i))
napi->rx_ring = &adapter->rx_ring[i];
- napi->tx_ring = &adapter->tx_ring[i];
- } else {
- napi->xdp_ring = &adapter->tx_ring[i];
- }
+
+ napi->tx_ring = &adapter->tx_ring[i];
napi->qid = i;
}
}
@@ -2475,8 +1900,8 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
return rc;
}

-static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
- int first_index, int count)
+int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
+ int first_index, int count)
{
struct ena_com_dev *ena_dev = adapter->ena_dev;
int rc, i;
@@ -2686,7 +2111,7 @@ static int create_queues_with_size_backoff(struct ena_adapter *adapter)
}
}

-static int ena_up(struct ena_adapter *adapter)
+int ena_up(struct ena_adapter *adapter)
{
int io_queue_count, rc, i;

@@ -2748,7 +2173,7 @@ static int ena_up(struct ena_adapter *adapter)
return rc;
}

-static void ena_down(struct ena_adapter *adapter)
+void ena_down(struct ena_adapter *adapter)
{
int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;

@@ -3179,7 +2604,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
/* set flags and meta data */
ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching);

- rc = ena_xmit_common(dev,
+ rc = ena_xmit_common(adapter,
tx_ring,
tx_info,
&ena_tx_ctx,
@@ -3982,10 +3407,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
{
struct ena_ring *tx_ring;
struct ena_ring *rx_ring;
- int i, budget, rc;
+ int qid, budget, rc;
int io_queue_count;

io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
+
/* Make sure the driver doesn't turn the device in other process */
smp_rmb();

@@ -3998,27 +3424,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
return;

- budget = ENA_MONITORED_TX_QUEUES;
+ budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES);

- for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
- tx_ring = &adapter->tx_ring[i];
- rx_ring = &adapter->rx_ring[i];
+ qid = adapter->last_monitored_tx_qid;
+
+ while (budget) {
+ qid = (qid + 1) % io_queue_count;
+
+ tx_ring = &adapter->tx_ring[qid];
+ rx_ring = &adapter->rx_ring[qid];

rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
if (unlikely(rc))
return;

- rc = !ENA_IS_XDP_INDEX(adapter, i) ?
+ rc = !ENA_IS_XDP_INDEX(adapter, qid) ?
check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
if (unlikely(rc))
return;

budget--;
- if (!budget)
- break;
}

- adapter->last_monitored_tx_qid = i % io_queue_count;
+ adapter->last_monitored_tx_qid = qid;
}

/* trigger napi schedule after 2 consecutive detections */
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 33c923e1261a..b364febab011 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -110,19 +110,6 @@

#define ENA_MMIO_DISABLE_REG_READ BIT(0)

-/* The max MTU size is configured to be the ethernet frame size without
- * the overhead of the ethernet header, which can have a VLAN header, and
- * a frame check sequence (FCS).
- * The buffer size we share with the device is defined to be ENA_PAGE_SIZE
- */
-
-#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \
- VLAN_HLEN - XDP_PACKET_HEADROOM - \
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
-
-#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \
- ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues))
-
struct ena_irq {
irq_handler_t handler;
void *data;
@@ -138,7 +125,6 @@ struct ena_napi {
struct napi_struct napi;
struct ena_ring *tx_ring;
struct ena_ring *rx_ring;
- struct ena_ring *xdp_ring;
u32 qid;
struct dim dim;
};
@@ -421,47 +407,44 @@ static inline void ena_reset_device(struct ena_adapter *adapter,
set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
}

-enum ena_xdp_errors_t {
- ENA_XDP_ALLOWED = 0,
- ENA_XDP_CURRENT_MTU_TOO_LARGE,
- ENA_XDP_NO_ENOUGH_QUEUES,
-};
-
-enum ENA_XDP_ACTIONS {
- ENA_XDP_PASS = 0,
- ENA_XDP_TX = BIT(0),
- ENA_XDP_REDIRECT = BIT(1),
- ENA_XDP_DROP = BIT(2)
-};
-
-#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT)
+int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
+ struct ena_tx_buffer *tx_info, bool is_xdp);

-static inline bool ena_xdp_present(struct ena_adapter *adapter)
+/* Increase a stat by cnt while holding syncp seqlock on 32bit machines */
+static inline void ena_increase_stat(u64 *statp, u64 cnt,
+ struct u64_stats_sync *syncp)
{
- return !!adapter->xdp_bpf_prog;
+ u64_stats_update_begin(syncp);
+ (*statp) += cnt;
+ u64_stats_update_end(syncp);
}

-static inline bool ena_xdp_present_ring(struct ena_ring *ring)
+static inline void ena_ring_tx_doorbell(struct ena_ring *tx_ring)
{
- return !!ring->xdp_bpf_prog;
-}
-
-static inline bool ena_xdp_legal_queue_count(struct ena_adapter *adapter,
- u32 queues)
-{
- return 2 * queues <= adapter->max_num_io_queues;
-}
-
-static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter)
-{
- enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED;
-
- if (adapter->netdev->mtu > ENA_XDP_MAX_MTU)
- rc = ENA_XDP_CURRENT_MTU_TOO_LARGE;
- else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
- rc = ENA_XDP_NO_ENOUGH_QUEUES;
-
- return rc;
+ ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+ ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp);
}

+int ena_xmit_common(struct ena_adapter *adapter,
+ struct ena_ring *ring,
+ struct ena_tx_buffer *tx_info,
+ struct ena_com_tx_ctx *ena_tx_ctx,
+ u16 next_to_use,
+ u32 bytes);
+void ena_unmap_tx_buff(struct ena_ring *tx_ring,
+ struct ena_tx_buffer *tx_info);
+void ena_init_io_rings(struct ena_adapter *adapter,
+ int first_index, int count);
+int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
+ int first_index, int count);
+int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
+ int first_index, int count);
+void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
+ int first_index, int count);
+void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
+void ena_down(struct ena_adapter *adapter);
+int ena_up(struct ena_adapter *adapter);
+void ena_unmask_interrupt(struct ena_ring *tx_ring, struct ena_ring *rx_ring);
+void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+ struct ena_ring *rx_ring);
#endif /* !(ENA_H) */
diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.c b/drivers/net/ethernet/amazon/ena/ena_xdp.c
new file mode 100644
index 000000000000..25de2f511649
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_xdp.c
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright 2015-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include "ena_xdp.h"
+
+static int validate_xdp_req_id(struct ena_ring *tx_ring, u16 req_id)
+{
+ struct ena_tx_buffer *tx_info;
+
+ tx_info = &tx_ring->tx_buffer_info[req_id];
+ if (likely(tx_info->xdpf))
+ return 0;
+
+ return handle_invalid_req_id(tx_ring, req_id, tx_info, true);
+}
+
+static int ena_xdp_tx_map_frame(struct ena_ring *tx_ring,
+ struct ena_tx_buffer *tx_info,
+ struct xdp_frame *xdpf,
+ struct ena_com_tx_ctx *ena_tx_ctx)
+{
+ struct ena_adapter *adapter = tx_ring->adapter;
+ struct ena_com_buf *ena_buf;
+ int push_len = 0;
+ dma_addr_t dma;
+ void *data;
+ u32 size;
+
+ tx_info->xdpf = xdpf;
+ data = tx_info->xdpf->data;
+ size = tx_info->xdpf->len;
+
+ if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+ /* Designate part of the packet for LLQ */
+ push_len = min_t(u32, size, tx_ring->tx_max_header_size);
+
+ ena_tx_ctx->push_header = data;
+
+ size -= push_len;
+ data += push_len;
+ }
+
+ ena_tx_ctx->header_len = push_len;
+
+ if (size > 0) {
+ dma = dma_map_single(tx_ring->dev,
+ data,
+ size,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
+ goto error_report_dma_error;
+
+ tx_info->map_linear_data = 0;
+
+ ena_buf = tx_info->bufs;
+ ena_buf->paddr = dma;
+ ena_buf->len = size;
+
+ ena_tx_ctx->ena_bufs = ena_buf;
+ ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
+ }
+
+ return 0;
+
+error_report_dma_error:
+ ena_increase_stat(&tx_ring->tx_stats.dma_mapping_err, 1,
+ &tx_ring->syncp);
+ netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
+
+ return -EINVAL;
+}
+
+int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
+ struct ena_adapter *adapter,
+ struct xdp_frame *xdpf,
+ int flags)
+{
+ struct ena_com_tx_ctx ena_tx_ctx = {};
+ struct ena_tx_buffer *tx_info;
+ u16 next_to_use, req_id;
+ int rc;
+
+ next_to_use = tx_ring->next_to_use;
+ req_id = tx_ring->free_ids[next_to_use];
+ tx_info = &tx_ring->tx_buffer_info[req_id];
+ tx_info->num_of_bufs = 0;
+
+ rc = ena_xdp_tx_map_frame(tx_ring, tx_info, xdpf, &ena_tx_ctx);
+ if (unlikely(rc))
+ goto err;
+
+ ena_tx_ctx.req_id = req_id;
+
+ rc = ena_xmit_common(adapter,
+ tx_ring,
+ tx_info,
+ &ena_tx_ctx,
+ next_to_use,
+ xdpf->len);
+ if (rc)
+ goto error_unmap_dma;
+
+ /* trigger the dma engine. ena_ring_tx_doorbell()
+ * calls a memory barrier inside it.
+ */
+ if (flags & XDP_XMIT_FLUSH)
+ ena_ring_tx_doorbell(tx_ring);
+
+ return rc;
+
+error_unmap_dma:
+ ena_unmap_tx_buff(tx_ring, tx_info);
+err:
+ tx_info->xdpf = NULL;
+
+ return rc;
+}
+
+int ena_xdp_xmit(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags)
+{
+ struct ena_adapter *adapter = netdev_priv(dev);
+ struct ena_ring *tx_ring;
+ int qid, i, nxmit = 0;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+ return -ENETDOWN;
+
+ /* We assume that all rings have the same XDP program */
+ if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog))
+ return -ENXIO;
+
+ qid = smp_processor_id() % adapter->xdp_num_queues;
+ qid += adapter->xdp_first_ring;
+ tx_ring = &adapter->tx_ring[qid];
+
+ /* Other CPU ids might try to send thorugh this queue */
+ spin_lock(&tx_ring->xdp_tx_lock);
+
+ for (i = 0; i < n; i++) {
+ if (ena_xdp_xmit_frame(tx_ring, adapter, frames[i], 0))
+ break;
+ nxmit++;
+ }
+
+ /* Ring doorbell to make device aware of the packets */
+ if (flags & XDP_XMIT_FLUSH)
+ ena_ring_tx_doorbell(tx_ring);
+
+ spin_unlock(&tx_ring->xdp_tx_lock);
+
+ /* Return number of packets sent */
+ return nxmit;
+}
+
+static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
+{
+ adapter->xdp_first_ring = adapter->num_io_queues;
+ adapter->xdp_num_queues = adapter->num_io_queues;
+
+ ena_init_io_rings(adapter,
+ adapter->xdp_first_ring,
+ adapter->xdp_num_queues);
+}
+
+int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
+{
+ u32 xdp_first_ring = adapter->xdp_first_ring;
+ u32 xdp_num_queues = adapter->xdp_num_queues;
+ int rc = 0;
+
+ rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
+ if (rc)
+ goto setup_err;
+
+ rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues);
+ if (rc)
+ goto create_err;
+
+ return 0;
+
+create_err:
+ ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
+setup_err:
+ return rc;
+}
+
+/* Provides a way for both kernel and bpf-prog to know
+ * more about the RX-queue a given XDP frame arrived on.
+ */
+static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
+{
+ int rc;
+
+ rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid, 0);
+
+ if (rc) {
+ netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
+ "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
+ rx_ring->qid, rc);
+ goto err;
+ }
+
+ rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL);
+
+ if (rc) {
+ netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
+ "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
+ rx_ring->qid, rc);
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+ }
+
+err:
+ return rc;
+}
+
+static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
+{
+ xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+}
+
+void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
+ struct bpf_prog *prog,
+ int first, int count)
+{
+ struct bpf_prog *old_bpf_prog;
+ struct ena_ring *rx_ring;
+ int i = 0;
+
+ for (i = first; i < count; i++) {
+ rx_ring = &adapter->rx_ring[i];
+ old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog);
+
+ if (!old_bpf_prog && prog) {
+ ena_xdp_register_rxq_info(rx_ring);
+ rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
+ } else if (old_bpf_prog && !prog) {
+ ena_xdp_unregister_rxq_info(rx_ring);
+ rx_ring->rx_headroom = NET_SKB_PAD;
+ }
+ }
+}
+
+static void ena_xdp_exchange_program(struct ena_adapter *adapter,
+ struct bpf_prog *prog)
+{
+ struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
+
+ ena_xdp_exchange_program_rx_in_range(adapter,
+ prog,
+ 0,
+ adapter->num_io_queues);
+
+ if (old_bpf_prog)
+ bpf_prog_put(old_bpf_prog);
+}
+
+static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
+{
+ bool was_up;
+ int rc;
+
+ was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+
+ if (was_up)
+ ena_down(adapter);
+
+ adapter->xdp_first_ring = 0;
+ adapter->xdp_num_queues = 0;
+ ena_xdp_exchange_program(adapter, NULL);
+ if (was_up) {
+ rc = ena_up(adapter);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ struct bpf_prog *prog = bpf->prog;
+ struct bpf_prog *old_bpf_prog;
+ int rc, prev_mtu;
+ bool is_up;
+
+ is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+ rc = ena_xdp_allowed(adapter);
+ if (rc == ENA_XDP_ALLOWED) {
+ old_bpf_prog = adapter->xdp_bpf_prog;
+ if (prog) {
+ if (!is_up) {
+ ena_init_all_xdp_queues(adapter);
+ } else if (!old_bpf_prog) {
+ ena_down(adapter);
+ ena_init_all_xdp_queues(adapter);
+ }
+ ena_xdp_exchange_program(adapter, prog);
+
+ if (is_up && !old_bpf_prog) {
+ rc = ena_up(adapter);
+ if (rc)
+ return rc;
+ }
+ xdp_features_set_redirect_target(netdev, false);
+ } else if (old_bpf_prog) {
+ xdp_features_clear_redirect_target(netdev);
+ rc = ena_destroy_and_free_all_xdp_queues(adapter);
+ if (rc)
+ return rc;
+ }
+
+ prev_mtu = netdev->max_mtu;
+ netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
+
+ if (!old_bpf_prog)
+ netif_info(adapter, drv, adapter->netdev,
+ "XDP program is set, changing the max_mtu from %d to %d",
+ prev_mtu, netdev->max_mtu);
+
+ } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
+ netif_err(adapter, drv, adapter->netdev,
+ "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
+ netdev->mtu, ENA_XDP_MAX_MTU);
+ NL_SET_ERR_MSG_MOD(bpf->extack,
+ "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
+ return -EINVAL;
+ } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
+ netif_err(adapter, drv, adapter->netdev,
+ "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
+ adapter->num_io_queues, adapter->max_num_io_queues);
+ NL_SET_ERR_MSG_MOD(bpf->extack,
+ "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
+ * program as well as to query the current xdp program id.
+ */
+int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+ switch (bpf->command) {
+ case XDP_SETUP_PROG:
+ return ena_xdp_set(netdev, bpf);
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int ena_clean_xdp_irq(struct ena_ring *tx_ring, u32 budget)
+{
+ u32 total_done = 0;
+ u16 next_to_clean;
+ int tx_pkts = 0;
+ u16 req_id;
+ int rc;
+
+ if (unlikely(!tx_ring))
+ return 0;
+ next_to_clean = tx_ring->next_to_clean;
+
+ while (tx_pkts < budget) {
+ struct ena_tx_buffer *tx_info;
+ struct xdp_frame *xdpf;
+
+ rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
+ &req_id);
+ if (rc) {
+ if (unlikely(rc == -EINVAL))
+ handle_invalid_req_id(tx_ring, req_id, NULL, true);
+ break;
+ }
+
+ /* validate that the request id points to a valid xdp_frame */
+ rc = validate_xdp_req_id(tx_ring, req_id);
+ if (rc)
+ break;
+
+ tx_info = &tx_ring->tx_buffer_info[req_id];
+ xdpf = tx_info->xdpf;
+
+ tx_info->xdpf = NULL;
+ tx_info->last_jiffies = 0;
+ ena_unmap_tx_buff(tx_ring, tx_info);
+
+ netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
+ "tx_poll: q %d skb %p completed\n", tx_ring->qid,
+ xdpf);
+
+ tx_pkts++;
+ total_done += tx_info->tx_descs;
+
+ xdp_return_frame(xdpf);
+ tx_ring->free_ids[next_to_clean] = req_id;
+ next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
+ tx_ring->ring_size);
+ }
+
+ tx_ring->next_to_clean = next_to_clean;
+ ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
+ ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
+
+ netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
+ "tx_poll: q %d done. total pkts: %d\n",
+ tx_ring->qid, tx_pkts);
+
+ return tx_pkts;
+}
+
+/* This is the XDP napi callback. XDP queues use a separate napi callback
+ * than Rx/Tx queues.
+ */
+int ena_xdp_io_poll(struct napi_struct *napi, int budget)
+{
+ struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+ u32 xdp_work_done, xdp_budget;
+ struct ena_ring *tx_ring;
+ int napi_comp_call = 0;
+ int ret;
+
+ tx_ring = ena_napi->tx_ring;
+
+ xdp_budget = budget;
+
+ if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
+ test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
+ napi_complete_done(napi, 0);
+ return 0;
+ }
+
+ xdp_work_done = ena_clean_xdp_irq(tx_ring, xdp_budget);
+
+ /* If the device is about to reset or down, avoid unmask
+ * the interrupt and return 0 so NAPI won't reschedule
+ */
+ if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags))) {
+ napi_complete_done(napi, 0);
+ ret = 0;
+ } else if (xdp_budget > xdp_work_done) {
+ napi_comp_call = 1;
+ if (napi_complete_done(napi, xdp_work_done))
+ ena_unmask_interrupt(tx_ring, NULL);
+ ena_update_ring_numa_node(tx_ring, NULL);
+ ret = xdp_work_done;
+ } else {
+ ret = xdp_budget;
+ }
+
+ u64_stats_update_begin(&tx_ring->syncp);
+ tx_ring->tx_stats.napi_comp += napi_comp_call;
+ tx_ring->tx_stats.tx_poll++;
+ u64_stats_update_end(&tx_ring->syncp);
+ tx_ring->tx_stats.last_napi_jiffies = jiffies;
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.h b/drivers/net/ethernet/amazon/ena/ena_xdp.h
new file mode 100644
index 000000000000..3fa8e80b18a9
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_xdp.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright 2015-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef ENA_XDP_H
+#define ENA_XDP_H
+
+#include "ena_netdev.h"
+#include <linux/bpf_trace.h>
+
+/* The max MTU size is configured to be the ethernet frame size without
+ * the overhead of the ethernet header, which can have a VLAN header, and
+ * a frame check sequence (FCS).
+ * The buffer size we share with the device is defined to be ENA_PAGE_SIZE
+ */
+#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \
+ VLAN_HLEN - XDP_PACKET_HEADROOM - \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
+#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \
+ ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues))
+
+enum ENA_XDP_ACTIONS {
+ ENA_XDP_PASS = 0,
+ ENA_XDP_TX = BIT(0),
+ ENA_XDP_REDIRECT = BIT(1),
+ ENA_XDP_DROP = BIT(2)
+};
+
+#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT)
+
+int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter);
+void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
+ struct bpf_prog *prog,
+ int first, int count);
+int ena_xdp_io_poll(struct napi_struct *napi, int budget);
+int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
+ struct ena_adapter *adapter,
+ struct xdp_frame *xdpf,
+ int flags);
+int ena_xdp_xmit(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags);
+int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf);
+
+enum ena_xdp_errors_t {
+ ENA_XDP_ALLOWED = 0,
+ ENA_XDP_CURRENT_MTU_TOO_LARGE,
+ ENA_XDP_NO_ENOUGH_QUEUES,
+};
+
+static inline bool ena_xdp_present(struct ena_adapter *adapter)
+{
+ return !!adapter->xdp_bpf_prog;
+}
+
+static inline bool ena_xdp_present_ring(struct ena_ring *ring)
+{
+ return !!ring->xdp_bpf_prog;
+}
+
+static inline bool ena_xdp_legal_queue_count(struct ena_adapter *adapter,
+ u32 queues)
+{
+ return 2 * queues <= adapter->max_num_io_queues;
+}
+
+static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter)
+{
+ enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED;
+
+ if (adapter->netdev->mtu > ENA_XDP_MAX_MTU)
+ rc = ENA_XDP_CURRENT_MTU_TOO_LARGE;
+ else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
+ rc = ENA_XDP_NO_ENOUGH_QUEUES;
+
+ return rc;
+}
+
+static inline int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+{
+ u32 verdict = ENA_XDP_PASS;
+ struct bpf_prog *xdp_prog;
+ struct ena_ring *xdp_ring;
+ struct xdp_frame *xdpf;
+ u64 *xdp_stat;
+
+ xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
+
+ if (!xdp_prog)
+ return verdict;
+
+ verdict = bpf_prog_run_xdp(xdp_prog, xdp);
+
+ switch (verdict) {
+ case XDP_TX:
+ xdpf = xdp_convert_buff_to_frame(xdp);
+ if (unlikely(!xdpf)) {
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+ verdict = ENA_XDP_DROP;
+ break;
+ }
+
+ /* Find xmit queue */
+ xdp_ring = rx_ring->xdp_ring;
+
+ /* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
+ spin_lock(&xdp_ring->xdp_tx_lock);
+
+ if (ena_xdp_xmit_frame(xdp_ring, rx_ring->adapter, xdpf,
+ XDP_XMIT_FLUSH))
+ xdp_return_frame(xdpf);
+
+ spin_unlock(&xdp_ring->xdp_tx_lock);
+ xdp_stat = &rx_ring->rx_stats.xdp_tx;
+ verdict = ENA_XDP_TX;
+ break;
+ case XDP_REDIRECT:
+ if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) {
+ xdp_stat = &rx_ring->rx_stats.xdp_redirect;
+ verdict = ENA_XDP_REDIRECT;
+ break;
+ }
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+ verdict = ENA_XDP_DROP;
+ break;
+ case XDP_ABORTED:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_aborted;
+ verdict = ENA_XDP_DROP;
+ break;
+ case XDP_DROP:
+ xdp_stat = &rx_ring->rx_stats.xdp_drop;
+ verdict = ENA_XDP_DROP;
+ break;
+ case XDP_PASS:
+ xdp_stat = &rx_ring->rx_stats.xdp_pass;
+ verdict = ENA_XDP_PASS;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict);
+ xdp_stat = &rx_ring->rx_stats.xdp_invalid;
+ verdict = ENA_XDP_DROP;
+ }
+
+ ena_increase_stat(xdp_stat, 1, &rx_ring->syncp);
+
+ return verdict;
+}
+#endif /* ENA_XDP_H */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index dac4f9510c17..38e3b2225ff1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10549,6 +10549,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
/* VF-reps may need to be re-opened after the PF is re-opened */
if (BNXT_PF(bp))
bnxt_vf_reps_open(bp);
+ if (bp->ptp_cfg)
+ atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS);
bnxt_ptp_init_rtc(bp, true);
bnxt_ptp_cfg_tstamp_filters(bp);
return 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 6ba2b9398633..7689086371e0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -213,6 +213,9 @@ void bnxt_ulp_start(struct bnxt *bp, int err)
if (err)
return;

+ if (edev->ulp_tbl->msix_requested)
+ bnxt_fill_msix_vecs(bp, edev->msix_entries);
+
if (aux_priv) {
struct auxiliary_device *adev;

@@ -394,12 +397,13 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp)
if (!edev)
goto aux_dev_uninit;

+ aux_priv->edev = edev;
+
ulp = kzalloc(sizeof(*ulp), GFP_KERNEL);
if (!ulp)
goto aux_dev_uninit;

edev->ulp_tbl = ulp;
- aux_priv->edev = edev;
bp->edev = edev;
bnxt_set_edev_info(edev, bp);

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 58744313f0eb..f6f6d7c04e8b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -4465,18 +4465,18 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
*/
rvu_write64(rvu, blkaddr, NIX_AF_CFG,
rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL);
+ }

- /* Set chan/link to backpressure TL3 instead of TL2 */
- rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
+ /* Set chan/link to backpressure TL3 instead of TL2 */
+ rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);

- /* Disable SQ manager's sticky mode operation (set TM6 = 0)
- * This sticky mode is known to cause SQ stalls when multiple
- * SQs are mapped to same SMQ and transmitting pkts at a time.
- */
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
- cfg &= ~BIT_ULL(15);
- rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
- }
+ /* Disable SQ manager's sticky mode operation (set TM6 = 0)
+ * This sticky mode is known to cause SQ stalls when multiple
+ * SQs are mapped to same SMQ and transmitting pkts at a time.
+ */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
+ cfg &= ~BIT_ULL(15);
+ rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);

ltdefs = rvu->kpu.lt_def;
/* Calibrate X2P bus to check if CGX/LBK links are fine */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
index 1e77bbf5d22a..1723e9912ae0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
@@ -382,6 +382,7 @@ static void otx2_qos_read_txschq_cfg_tl(struct otx2_qos_node *parent,
otx2_qos_read_txschq_cfg_tl(node, cfg);
cnt = cfg->static_node_pos[node->level];
cfg->schq_contig_list[node->level][cnt] = node->schq;
+ cfg->schq_index_used[node->level][cnt] = true;
cfg->schq_contig[node->level]++;
cfg->static_node_pos[node->level]++;
otx2_qos_read_txschq_cfg_schq(node, cfg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index 7b700d0f956a..b171cd8f11e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -95,9 +95,15 @@ static inline void mlx5e_ptp_metadata_fifo_push(struct mlx5e_ptp_metadata_fifo *
}

static inline u8
+mlx5e_ptp_metadata_fifo_peek(struct mlx5e_ptp_metadata_fifo *fifo)
+{
+ return fifo->data[fifo->mask & fifo->cc];
+}
+
+static inline void
mlx5e_ptp_metadata_fifo_pop(struct mlx5e_ptp_metadata_fifo *fifo)
{
- return fifo->data[fifo->mask & fifo->cc++];
+ fifo->cc++;
}

static inline void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
index 244bc15a42ab..d9acc37afe1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -82,24 +82,25 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,

txq_ix = mlx5e_qid_from_qos(chs, node_qid);

- WARN_ON(node_qid > priv->htb_max_qos_sqs);
- if (node_qid == priv->htb_max_qos_sqs) {
- struct mlx5e_sq_stats *stats, **stats_list = NULL;
-
- if (priv->htb_max_qos_sqs == 0) {
- stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
- sizeof(*stats_list),
- GFP_KERNEL);
- if (!stats_list)
- return -ENOMEM;
- }
+ WARN_ON(node_qid >= mlx5e_htb_cur_leaf_nodes(priv->htb));
+ if (!priv->htb_qos_sq_stats) {
+ struct mlx5e_sq_stats **stats_list;
+
+ stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+ sizeof(*stats_list), GFP_KERNEL);
+ if (!stats_list)
+ return -ENOMEM;
+
+ WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+ }
+
+ if (!priv->htb_qos_sq_stats[node_qid]) {
+ struct mlx5e_sq_stats *stats;
+
stats = kzalloc(sizeof(*stats), GFP_KERNEL);
- if (!stats) {
- kvfree(stats_list);
+ if (!stats)
return -ENOMEM;
- }
- if (stats_list)
- WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+
WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
/* Order htb_max_qos_sqs increment after writing the array pointer.
* Pairs with smp_load_acquire in en_stats.c.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
index f675b1926340..f66bbc846464 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
@@ -57,6 +57,7 @@ int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)

void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
{
+ mutex_lock(selq->state_lock);
WARN_ON_ONCE(selq->is_prepared);

kvfree(selq->standby);
@@ -67,6 +68,7 @@ void mlx5e_selq_cleanup(struct mlx5e_selq *selq)

kvfree(selq->standby);
selq->standby = NULL;
+ mutex_unlock(selq->state_lock);
}

void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c3961c2bbc57..d49c348f89d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5694,9 +5694,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
kfree(priv->tx_rates);
kfree(priv->txq2sq);
destroy_workqueue(priv->wq);
- mutex_lock(&priv->state_lock);
mlx5e_selq_cleanup(&priv->selq);
- mutex_unlock(&priv->state_lock);
free_cpumask_var(priv->scratchpad.cpumask);

for (i = 0; i < priv->htb_max_qos_sqs; i++)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 1ead69c5f5fa..24cbd44dae93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -398,6 +398,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) {
u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);

+ mlx5e_ptp_metadata_fifo_pop(&sq->ptpsq->metadata_freelist);
+
mlx5e_skb_cb_hwtstamp_init(skb);
mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
metadata_index);
@@ -496,9 +498,6 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,

err_drop:
stats->dropped++;
- if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
- mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist,
- be32_to_cpu(eseg->flow_table_metadata));
dev_kfree_skb_any(skb);
mlx5e_tx_flush(sq);
}
@@ -657,7 +656,7 @@ static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb,
{
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
eseg->flow_table_metadata =
- cpu_to_be32(mlx5e_ptp_metadata_fifo_pop(&ptpsq->metadata_freelist));
+ cpu_to_be32(mlx5e_ptp_metadata_fifo_peek(&ptpsq->metadata_freelist));
}

static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index a13b9c2bd144..e2f7cecce6f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1664,6 +1664,16 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
return err;
}

+static bool mlx5_pkt_reformat_cmp(struct mlx5_pkt_reformat *p1,
+ struct mlx5_pkt_reformat *p2)
+{
+ return p1->owner == p2->owner &&
+ (p1->owner == MLX5_FLOW_RESOURCE_OWNER_FW ?
+ p1->id == p2->id :
+ mlx5_fs_dr_action_get_pkt_reformat_id(p1) ==
+ mlx5_fs_dr_action_get_pkt_reformat_id(p2));
+}
+
static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
struct mlx5_flow_destination *d2)
{
@@ -1675,8 +1685,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
(d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
- (d1->vport.pkt_reformat->id ==
- d2->vport.pkt_reformat->id) : true)) ||
+ mlx5_pkt_reformat_cmp(d1->vport.pkt_reformat,
+ d2->vport.pkt_reformat) : true)) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
d1->ft == d2->ft) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
@@ -1808,8 +1818,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
}
trace_mlx5_fs_set_fte(fte, false);

+ /* Link newly added rules into the tree. */
for (i = 0; i < handle->num_rules; i++) {
- if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
+ if (!handle->rule[i]->node.parent) {
tree_add_node(&handle->rule[i]->node, &fte->node);
trace_mlx5_fs_add_rule(handle->rule[i]);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6ca91c0e8a6a..9710ddac1f1a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1469,6 +1469,14 @@ int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev)
if (err)
goto err_register;

+ err = mlx5_crdump_enable(dev);
+ if (err)
+ mlx5_core_err(dev, "mlx5_crdump_enable failed with error code %d\n", err);
+
+ err = mlx5_hwmon_dev_register(dev);
+ if (err)
+ mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
+
mutex_unlock(&dev->intf_state_mutex);
return 0;

@@ -1494,7 +1502,10 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
int err;

devl_lock(devlink);
+ devl_register(devlink);
err = mlx5_init_one_devl_locked(dev);
+ if (err)
+ devl_unregister(devlink);
devl_unlock(devlink);
return err;
}
@@ -1506,6 +1517,8 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
devl_lock(devlink);
mutex_lock(&dev->intf_state_mutex);

+ mlx5_hwmon_dev_unregister(dev);
+ mlx5_crdump_disable(dev);
mlx5_unregister_device(dev);

if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
@@ -1523,6 +1536,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
mlx5_function_teardown(dev, true);
out:
mutex_unlock(&dev->intf_state_mutex);
+ devl_unregister(devlink);
devl_unlock(devlink);
}

@@ -1669,16 +1683,20 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev)
}

devl_lock(devlink);
+ devl_register(devlink);
+
err = mlx5_devlink_params_register(priv_to_devlink(dev));
- devl_unlock(devlink);
if (err) {
mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err);
goto query_hca_caps_err;
}

+ devl_unlock(devlink);
return 0;

query_hca_caps_err:
+ devl_unregister(devlink);
+ devl_unlock(devlink);
mlx5_function_disable(dev, true);
out:
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
@@ -1691,6 +1709,7 @@ void mlx5_uninit_one_light(struct mlx5_core_dev *dev)

devl_lock(devlink);
mlx5_devlink_params_unregister(priv_to_devlink(dev));
+ devl_unregister(devlink);
devl_unlock(devlink);
if (dev->state != MLX5_DEVICE_STATE_UP)
return;
@@ -1932,16 +1951,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_init_one;
}

- err = mlx5_crdump_enable(dev);
- if (err)
- dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
-
- err = mlx5_hwmon_dev_register(dev);
- if (err)
- mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
-
pci_save_state(pdev);
- devlink_register(devlink);
return 0;

err_init_one:
@@ -1962,16 +1972,9 @@ static void remove_one(struct pci_dev *pdev)
struct devlink *devlink = priv_to_devlink(dev);

set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
- /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using
- * devlink notify APIs.
- * Hence, we must drain them before unregistering the devlink.
- */
mlx5_drain_fw_reset(dev);
mlx5_drain_health_wq(dev);
- devlink_unregister(devlink);
mlx5_sriov_disable(pdev, false);
- mlx5_hwmon_dev_unregister(dev);
- mlx5_crdump_disable(dev);
mlx5_uninit_one(dev);
mlx5_pci_close(dev);
mlx5_mdev_uninit(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 4dcf995cb1a2..6bac8ad70ba6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -19,6 +19,7 @@
#define MLX5_IRQ_CTRL_SF_MAX 8
/* min num of vectors for SFs to be enabled */
#define MLX5_IRQ_VEC_COMP_BASE_SF 2
+#define MLX5_IRQ_VEC_COMP_BASE 1

#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
@@ -246,6 +247,7 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
return;
}

+ vecidx -= MLX5_IRQ_VEC_COMP_BASE;
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
}

@@ -585,7 +587,7 @@ struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
struct mlx5_irq_pool *pool = table->pcif_pool;
struct irq_affinity_desc af_desc;
- int offset = 1;
+ int offset = MLX5_IRQ_VEC_COMP_BASE;

if (!pool->xa_num_irqs.max)
offset = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index 8fe82f1191bb..30218f37d528 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -69,24 +69,28 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
{
struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
- struct devlink *devlink = priv_to_devlink(sf_dev->mdev);
+ struct mlx5_core_dev *mdev = sf_dev->mdev;
+ struct devlink *devlink;

- mlx5_drain_health_wq(sf_dev->mdev);
- devlink_unregister(devlink);
- if (mlx5_dev_is_lightweight(sf_dev->mdev))
- mlx5_uninit_one_light(sf_dev->mdev);
+ devlink = priv_to_devlink(mdev);
+ set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
+ mlx5_drain_health_wq(mdev);
+ if (mlx5_dev_is_lightweight(mdev))
+ mlx5_uninit_one_light(mdev);
else
- mlx5_uninit_one(sf_dev->mdev);
- iounmap(sf_dev->mdev->iseg);
- mlx5_mdev_uninit(sf_dev->mdev);
+ mlx5_uninit_one(mdev);
+ iounmap(mdev->iseg);
+ mlx5_mdev_uninit(mdev);
mlx5_devlink_free(devlink);
}

static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev)
{
struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
+ struct mlx5_core_dev *mdev = sf_dev->mdev;

- mlx5_unload_one(sf_dev->mdev, false);
+ set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
+ mlx5_unload_one(mdev, false);
}

static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = {
diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h
index e5ec0a363aff..31f75b4a67fd 100644
--- a/drivers/net/ethernet/micrel/ks8851.h
+++ b/drivers/net/ethernet/micrel/ks8851.h
@@ -368,7 +368,6 @@ union ks8851_tx_hdr {
* @rdfifo: FIFO read callback
* @wrfifo: FIFO write callback
* @start_xmit: start_xmit() implementation callback
- * @rx_skb: rx_skb() implementation callback
* @flush_tx_work: flush_tx_work() implementation callback
*
* The @statelock is used to protect information in the structure which may
@@ -423,8 +422,6 @@ struct ks8851_net {
struct sk_buff *txp, bool irq);
netdev_tx_t (*start_xmit)(struct sk_buff *skb,
struct net_device *dev);
- void (*rx_skb)(struct ks8851_net *ks,
- struct sk_buff *skb);
void (*flush_tx_work)(struct ks8851_net *ks);
};

diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
index 0bf13b38b8f5..d4cdf3d4f552 100644
--- a/drivers/net/ethernet/micrel/ks8851_common.c
+++ b/drivers/net/ethernet/micrel/ks8851_common.c
@@ -231,16 +231,6 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt)
rxpkt[12], rxpkt[13], rxpkt[14], rxpkt[15]);
}

-/**
- * ks8851_rx_skb - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb)
-{
- ks->rx_skb(ks, skb);
-}
-
/**
* ks8851_rx_pkts - receive packets from the host
* @ks: The device information.
@@ -309,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
ks8851_dbg_dumpkkt(ks, rxpkt);

skb->protocol = eth_type_trans(skb, ks->netdev);
- ks8851_rx_skb(ks, skb);
+ __netif_rx(skb);

ks->netdev->stats.rx_packets++;
ks->netdev->stats.rx_bytes += rxlen;
@@ -340,6 +330,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
unsigned long flags;
unsigned int status;

+ local_bh_disable();
+
ks8851_lock(ks, &flags);

status = ks8851_rdreg16(ks, KS_ISR);
@@ -416,6 +408,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
if (status & IRQ_LCI)
mii_check_link(&ks->mii);

+ local_bh_enable();
+
return IRQ_HANDLED;
}

diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c
index 7f49042484bd..96fb0ffcedb9 100644
--- a/drivers/net/ethernet/micrel/ks8851_par.c
+++ b/drivers/net/ethernet/micrel/ks8851_par.c
@@ -210,16 +210,6 @@ static void ks8851_wrfifo_par(struct ks8851_net *ks, struct sk_buff *txp,
iowrite16_rep(ksp->hw_addr, txp->data, len / 2);
}

-/**
- * ks8851_rx_skb_par - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb)
-{
- netif_rx(skb);
-}
-
static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks)
{
return ks8851_rdreg16_par(ks, KS_TXQCR);
@@ -298,7 +288,6 @@ static int ks8851_probe_par(struct platform_device *pdev)
ks->rdfifo = ks8851_rdfifo_par;
ks->wrfifo = ks8851_wrfifo_par;
ks->start_xmit = ks8851_start_xmit_par;
- ks->rx_skb = ks8851_rx_skb_par;

#define STD_IRQ (IRQ_LCI | /* Link Change */ \
IRQ_RXI | /* RX done */ \
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
index 88e26c120b48..4dcbff789b19 100644
--- a/drivers/net/ethernet/micrel/ks8851_spi.c
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -298,16 +298,6 @@ static unsigned int calc_txlen(unsigned int len)
return ALIGN(len + 4, 4);
}

-/**
- * ks8851_rx_skb_spi - receive skbuff
- * @ks: The device state
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb)
-{
- netif_rx(skb);
-}
-
/**
* ks8851_tx_work - process tx packet(s)
* @work: The work strucutre what was scheduled.
@@ -435,7 +425,6 @@ static int ks8851_probe_spi(struct spi_device *spi)
ks->rdfifo = ks8851_rdfifo_spi;
ks->wrfifo = ks8851_wrfifo_spi;
ks->start_xmit = ks8851_start_xmit_spi;
- ks->rx_skb = ks8851_rx_skb_spi;
ks->flush_tx_work = ks8851_flush_tx_work_spi;

#define STD_IRQ (IRQ_LCI | /* Link Change */ \
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
index 3a1b1a1f5a19..60dd2fd603a8 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
@@ -731,7 +731,7 @@ static int sparx5_port_pcs_low_set(struct sparx5 *sparx5,
bool sgmii = false, inband_aneg = false;
int err;

- if (port->conf.inband) {
+ if (conf->inband) {
if (conf->portmode == PHY_INTERFACE_MODE_SGMII ||
conf->portmode == PHY_INTERFACE_MODE_QSGMII)
inband_aneg = true; /* Cisco-SGMII in-band-aneg */
@@ -948,7 +948,7 @@ int sparx5_port_pcs_set(struct sparx5 *sparx5,
if (err)
return -EINVAL;

- if (port->conf.inband) {
+ if (conf->inband) {
/* Enable/disable 1G counters in ASM */
spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev),
ASM_PORT_CFG_CSC_STAT_DIS,
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 0204ac92b05a..0a18b67d0d66 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -926,7 +926,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
__be16 sport;
int err;

- if (!pskb_inet_may_pull(skb))
+ if (!skb_vlan_inet_prepare(skb))
return -EINVAL;

sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
@@ -1024,7 +1024,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
__be16 sport;
int err;

- if (!pskb_inet_may_pull(skb))
+ if (!skb_vlan_inet_prepare(skb))
return -EINVAL;

sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
diff --git a/drivers/platform/chrome/cros_ec_uart.c b/drivers/platform/chrome/cros_ec_uart.c
index 788246559bbb..823371037af7 100644
--- a/drivers/platform/chrome/cros_ec_uart.c
+++ b/drivers/platform/chrome/cros_ec_uart.c
@@ -264,12 +264,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
if (!ec_dev)
return -ENOMEM;

- ret = devm_serdev_device_open(dev, serdev);
- if (ret) {
- dev_err(dev, "Unable to open UART device");
- return ret;
- }
-
serdev_device_set_drvdata(serdev, ec_dev);
init_waitqueue_head(&ec_uart->response.wait_queue);

@@ -281,14 +275,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
return ret;
}

- ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
- if (ret < 0) {
- dev_err(dev, "Failed to set up host baud rate (%d)", ret);
- return ret;
- }
-
- serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
-
/* Initialize ec_dev for cros_ec */
ec_dev->phys_name = dev_name(dev);
ec_dev->dev = dev;
@@ -302,6 +288,20 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)

serdev_device_set_client_ops(serdev, &cros_ec_uart_client_ops);

+ ret = devm_serdev_device_open(dev, serdev);
+ if (ret) {
+ dev_err(dev, "Unable to open UART device");
+ return ret;
+ }
+
+ ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
+ if (ret < 0) {
+ dev_err(dev, "Failed to set up host baud rate (%d)", ret);
+ return ret;
+ }
+
+ serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
+
return cros_ec_register(ec_dev);
}

diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index b155ac800979..e4363b8c6ad2 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1792,7 +1792,7 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
if (dev_is_sata(device)) {
struct ata_link *link = &device->sata_dev.ap->link;

- rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT,
+ rc = ata_wait_after_reset(link, jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT,
smp_ata_check_ready_type);
} else {
msleep(2000);
diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c
index 26e6b3e3af43..dcde55c8ee5d 100644
--- a/drivers/scsi/qla2xxx/qla_edif.c
+++ b/drivers/scsi/qla2xxx/qla_edif.c
@@ -1100,7 +1100,7 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job)

list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
if (fcport->edif.enable) {
- if (pcnt > app_req.num_ports)
+ if (pcnt >= app_req.num_ports)
break;

app_reply->elem[pcnt].rekey_count =
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 0d8afffd1683..e6d8beb87776 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -285,6 +285,7 @@ sg_open(struct inode *inode, struct file *filp)
int dev = iminor(inode);
int flags = filp->f_flags;
struct request_queue *q;
+ struct scsi_device *device;
Sg_device *sdp;
Sg_fd *sfp;
int retval;
@@ -301,11 +302,12 @@ sg_open(struct inode *inode, struct file *filp)

/* This driver's module count bumped by fops_get in <linux/fs.h> */
/* Prevent the device driver from vanishing while we sleep */
- retval = scsi_device_get(sdp->device);
+ device = sdp->device;
+ retval = scsi_device_get(device);
if (retval)
goto sg_put;

- retval = scsi_autopm_get_device(sdp->device);
+ retval = scsi_autopm_get_device(device);
if (retval)
goto sdp_put;

@@ -313,7 +315,7 @@ sg_open(struct inode *inode, struct file *filp)
* check if O_NONBLOCK. Permits SCSI commands to be issued
* during error recovery. Tread carefully. */
if (!((flags & O_NONBLOCK) ||
- scsi_block_when_processing_errors(sdp->device))) {
+ scsi_block_when_processing_errors(device))) {
retval = -ENXIO;
/* we are in error recovery for this device */
goto error_out;
@@ -344,7 +346,7 @@ sg_open(struct inode *inode, struct file *filp)

if (sdp->open_cnt < 1) { /* no existing opens */
sdp->sgdebug = 0;
- q = sdp->device->request_queue;
+ q = device->request_queue;
sdp->sg_tablesize = queue_max_segments(q);
}
sfp = sg_add_sfp(sdp);
@@ -370,10 +372,11 @@ sg_open(struct inode *inode, struct file *filp)
error_mutex_locked:
mutex_unlock(&sdp->open_rel_lock);
error_out:
- scsi_autopm_put_device(sdp->device);
+ scsi_autopm_put_device(device);
sdp_put:
- scsi_device_put(sdp->device);
- goto sg_put;
+ kref_put(&sdp->d_ref, sg_device_destroy);
+ scsi_device_put(device);
+ return retval;
}

/* Release resources associated with a successful sg_open()
@@ -2208,6 +2211,7 @@ sg_remove_sfp_usercontext(struct work_struct *work)
{
struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work);
struct sg_device *sdp = sfp->parentdp;
+ struct scsi_device *device = sdp->device;
Sg_request *srp;
unsigned long iflags;

@@ -2233,8 +2237,8 @@ sg_remove_sfp_usercontext(struct work_struct *work)
"sg_remove_sfp: sfp=0x%p\n", sfp));
kfree(sfp);

- scsi_device_put(sdp->device);
kref_put(&sdp->d_ref, sg_device_destroy);
+ scsi_device_put(device);
module_put(THIS_MODULE);
}

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index e0c181ad17e3..88362c0afe45 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2799,9 +2799,19 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
r = vhost_get_avail_idx(vq, &avail_idx);
if (unlikely(r))
return false;
+
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+ if (vq->avail_idx != vq->last_avail_idx) {
+ /* Since we have updated avail_idx, the following
+ * call to vhost_get_vq_desc() will read available
+ * ring entries. Make sure that read happens after
+ * the avail_idx read.
+ */
+ smp_rmb();
+ return false;
+ }

- return vq->avail_idx == vq->last_avail_idx;
+ return true;
}
EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);

@@ -2838,9 +2848,19 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
&vq->avail->idx, r);
return false;
}
+
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+ if (vq->avail_idx != vq->last_avail_idx) {
+ /* Since we have updated avail_idx, the following
+ * call to vhost_get_vq_desc() will read available
+ * ring entries. Make sure that read happens after
+ * the avail_idx read.
+ */
+ smp_rmb();
+ return true;
+ }

- return vq->avail_idx != vq->last_avail_idx;
+ return false;
}
EXPORT_SYMBOL_GPL(vhost_enable_notify);

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 16f9e5f474cc..6d562f18d3f8 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1120,6 +1120,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
if (ret)
return ret;

+ ret = btrfs_record_root_in_trans(trans, node->root);
+ if (ret)
+ return ret;
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
return ret;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1ac14223ffb5..e57d18825a56 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4460,6 +4460,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
struct btrfs_trans_handle *trans;
struct btrfs_block_rsv block_rsv;
u64 root_flags;
+ u64 qgroup_reserved = 0;
int ret;

down_write(&fs_info->subvol_sem);
@@ -4504,12 +4505,20 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
if (ret)
goto out_undead;
+ qgroup_reserved = block_rsv.qgroup_rsv_reserved;

trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_release;
}
+ ret = btrfs_record_root_in_trans(trans, root);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out_end_trans;
+ }
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;

@@ -4568,7 +4577,9 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
ret = btrfs_end_transaction(trans);
inode->i_flags |= S_DEAD;
out_release:
- btrfs_subvolume_release_metadata(root, &block_rsv);
+ btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
out_undead:
if (ret) {
spin_lock(&dest->root_item_lock);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 839e579268dc..3f43a08613d8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -603,6 +603,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
int ret;
dev_t anon_dev;
u64 objectid;
+ u64 qgroup_reserved = 0;

root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
if (!root_item)
@@ -640,13 +641,18 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
trans_num_items, false);
if (ret)
goto out_new_inode_args;
+ qgroup_reserved = block_rsv.qgroup_rsv_reserved;

trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- btrfs_subvolume_release_metadata(root, &block_rsv);
- goto out_new_inode_args;
+ goto out_release_rsv;
}
+ ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+ if (ret)
+ goto out;
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
/* Tree log can't currently deal with an inode which is a new root. */
@@ -757,9 +763,11 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
out:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
- btrfs_subvolume_release_metadata(root, &block_rsv);
-
btrfs_end_transaction(trans);
+out_release_rsv:
+ btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
out_new_inode_args:
btrfs_new_inode_args_destroy(&new_inode_args);
out_inode:
@@ -781,6 +789,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct btrfs_pending_snapshot *pending_snapshot;
unsigned int trans_num_items;
struct btrfs_trans_handle *trans;
+ struct btrfs_block_rsv *block_rsv;
+ u64 qgroup_reserved = 0;
int ret;

/* We do not support snapshotting right now. */
@@ -817,19 +827,19 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
goto free_pending;
}

- btrfs_init_block_rsv(&pending_snapshot->block_rsv,
- BTRFS_BLOCK_RSV_TEMP);
+ block_rsv = &pending_snapshot->block_rsv;
+ btrfs_init_block_rsv(block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
* 1 to add dir item
* 1 to add dir index
* 1 to update parent inode item
*/
trans_num_items = create_subvol_num_items(inherit) + 3;
- ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
- &pending_snapshot->block_rsv,
+ ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, block_rsv,
trans_num_items, false);
if (ret)
goto free_pending;
+ qgroup_reserved = block_rsv->qgroup_rsv_reserved;

pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
@@ -842,6 +852,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
ret = PTR_ERR(trans);
goto fail;
}
+ ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+ if (ret) {
+ btrfs_end_transaction(trans);
+ goto fail;
+ }
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;

trans->pending_snapshot = pending_snapshot;

@@ -871,7 +888,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret && pending_snapshot->snap)
pending_snapshot->snap->anon_dev = 0;
btrfs_put_root(pending_snapshot->snap);
- btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv);
+ btrfs_block_rsv_release(fs_info, block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
free_pending:
if (pending_snapshot->anon_dev)
free_anon_bdev(pending_snapshot->anon_dev);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 312c7f885386..99ea2c6d3194 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -4162,6 +4162,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
BTRFS_QGROUP_RSV_META_PREALLOC);
trace_qgroup_meta_convert(root, num_bytes);
qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes);
+ if (!sb_rdonly(fs_info->sb))
+ add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS);
}

/*
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 5b0f1bccc409..aac18f620de4 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -537,13 +537,3 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
}
return ret;
}
-
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- u64 qgroup_to_release;
-
- btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release);
- btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release);
-}
diff --git a/fs/btrfs/root-tree.h b/fs/btrfs/root-tree.h
index cbbaca32126e..cce808b44cc0 100644
--- a/fs/btrfs/root-tree.h
+++ b/fs/btrfs/root-tree.h
@@ -6,8 +6,6 @@
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int nitems, bool use_global_rsv);
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv);
int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
u64 ref_id, u64 dirid, u64 sequence,
const struct fscrypt_str *name);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 28e54168118f..457ec7d02a9a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -715,14 +715,6 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
h->reloc_reserved = reloc_reserved;
}

- /*
- * Now that we have found a transaction to be a part of, convert the
- * qgroup reservation from prealloc to pertrans. A different transaction
- * can't race in and free our pertrans out from under us.
- */
- if (qgroup_reserved)
- btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
-
got_it:
if (!current->journal_info)
current->journal_info = h;
@@ -756,8 +748,15 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
* not just freed.
*/
btrfs_end_transaction(h);
- return ERR_PTR(ret);
+ goto reserve_fail;
}
+ /*
+ * Now that we have found a transaction to be a part of, convert the
+ * qgroup reservation from prealloc to pertrans. A different transaction
+ * can't race in and free our pertrans out from under us.
+ */
+ if (qgroup_reserved)
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);

return h;

diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 1a9e705d6500..567f718362c5 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -417,8 +417,8 @@ smb2_close_cached_fid(struct kref *ref)
if (cfid->is_open) {
rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
cfid->fid.volatile_fid);
- if (rc != -EBUSY && rc != -EAGAIN)
- atomic_dec(&cfid->tcon->num_remote_opens);
+ if (rc) /* should we retry on -EBUSY or -EAGAIN? */
+ cifs_dbg(VFS, "close cached dir rc %d\n", rc);
}

free_cached_dir(cfid);
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index b3772edca2e6..fd4a823ce3cd 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -681,4 +681,11 @@ static inline bool dma_fence_is_container(struct dma_fence *fence)
return dma_fence_is_array(fence) || dma_fence_is_chain(fence);
}

+#define DMA_FENCE_WARN(f, fmt, args...) \
+ do { \
+ struct dma_fence *__ff = (f); \
+ pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\
+ ##args); \
+ } while (0)
+
#endif /* __LINUX_DMA_FENCE_H */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 2b665c32f5fe..2e09c269bf9d 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -126,7 +126,7 @@ do { \
# define lockdep_softirq_enter() do { } while (0)
# define lockdep_softirq_exit() do { } while (0)
# define lockdep_hrtimer_enter(__hrtimer) false
-# define lockdep_hrtimer_exit(__context) do { } while (0)
+# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0)
# define lockdep_posixtimer_enter() do { } while (0)
# define lockdep_posixtimer_exit() do { } while (0)
# define lockdep_irq_work_enter(__work) do { } while (0)
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index ffe48e69b3f3..457879938fc1 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p)
p->v++;
}

-static inline void u64_stats_init(struct u64_stats_sync *syncp)
-{
- seqcount_init(&syncp->seq);
-}
+#define u64_stats_init(syncp) \
+ do { \
+ struct u64_stats_sync *__s = (syncp); \
+ seqcount_init(&__s->seq); \
+ } while (0)

static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
{
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 61ebe723ee4d..facb7a469efa 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -437,6 +437,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp)
refcount_inc(&ifp->refcnt);
}

+static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp)
+{
+ return refcount_inc_not_zero(&ifp->refcnt);
+}

/*
* compute link-local solicited-node multicast address
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index afd40dce40f3..d1b07ddbe677 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -55,7 +55,7 @@ struct unix_sock {
struct mutex iolock, bindlock;
struct sock *peer;
struct list_head link;
- atomic_long_t inflight;
+ unsigned long inflight;
spinlock_t lock;
unsigned long gc_flags;
#define UNIX_GC_CANDIDATE 0
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index aa90adc3b2a4..e4a6831133f8 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -164,6 +164,8 @@ struct bt_voice {
#define BT_ISO_QOS_BIG_UNSET 0xff
#define BT_ISO_QOS_BIS_UNSET 0xff

+#define BT_ISO_SYNC_TIMEOUT 0x07d0 /* 20 secs */
+
struct bt_iso_io_qos {
__u32 interval;
__u16 latency;
@@ -583,6 +585,15 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
return skb;
}

+static inline int bt_copy_from_sockptr(void *dst, size_t dst_size,
+ sockptr_t src, size_t src_size)
+{
+ if (dst_size > src_size)
+ return -EINVAL;
+
+ return copy_from_sockptr(dst, src, dst_size);
+}
+
int bt_to_errno(u16 code);
__u8 bt_status(int err);

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index f346b4efbc30..822f0fad3962 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -360,6 +360,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb)
return pskb_network_may_pull(skb, nhlen);
}

+/* Variant of pskb_inet_may_pull().
+ */
+static inline bool skb_vlan_inet_prepare(struct sk_buff *skb)
+{
+ int nhlen = 0, maclen = ETH_HLEN;
+ __be16 type = skb->protocol;
+
+ /* Essentially this is skb_protocol(skb, true)
+ * And we get MAC len.
+ */
+ if (eth_type_vlan(type))
+ type = __vlan_get_protocol(skb, type, &maclen);
+
+ switch (type) {
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ nhlen = sizeof(struct ipv6hdr);
+ break;
+#endif
+ case htons(ETH_P_IP):
+ nhlen = sizeof(struct iphdr);
+ break;
+ }
+ /* For ETH_P_IPV6/ETH_P_IP we make sure to pull
+ * a base network header in skb->head.
+ */
+ if (!pskb_may_pull(skb, maclen + nhlen))
+ return false;
+
+ skb_set_network_header(skb, maclen);
+ return true;
+}
+
static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
{
const struct ip_tunnel_encap_ops *ops;
diff --git a/io_uring/net.c b/io_uring/net.c
index 5a4001139e28..cf1060fb04f4 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1274,6 +1274,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)

if (req_has_async_data(req)) {
kmsg = req->async_data;
+ kmsg->msg.msg_control_user = sr->msg_control;
} else {
ret = io_sendmsg_copy_hdr(req, &iomsg);
if (ret)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 72e0f5380bf6..92429104bbf8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -3208,7 +3208,8 @@ enum cpu_mitigations {
};

static enum cpu_mitigations cpu_mitigations __ro_after_init =
- CPU_MITIGATIONS_AUTO;
+ IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO :
+ CPU_MITIGATIONS_OFF;

static int __init mitigations_parse_cmdline(char *arg)
{
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index b486504766fb..c2841e595713 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1567,10 +1567,17 @@ static int check_kprobe_address_safe(struct kprobe *p,
jump_label_lock();
preempt_disable();

- /* Ensure it is not in reserved area nor out of text */
- if (!(core_kernel_text((unsigned long) p->addr) ||
- is_module_text_address((unsigned long) p->addr)) ||
- in_gate_area_no_mm((unsigned long) p->addr) ||
+ /* Ensure the address is in a text area, and find a module if exists. */
+ *probed_mod = NULL;
+ if (!core_kernel_text((unsigned long) p->addr)) {
+ *probed_mod = __module_text_address((unsigned long) p->addr);
+ if (!(*probed_mod)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ /* Ensure it is not in reserved area. */
+ if (in_gate_area_no_mm((unsigned long) p->addr) ||
within_kprobe_blacklist((unsigned long) p->addr) ||
jump_label_text_reserved(p->addr, p->addr) ||
static_call_text_reserved(p->addr, p->addr) ||
@@ -1580,8 +1587,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
goto out;
}

- /* Check if 'p' is probing a module. */
- *probed_mod = __module_text_address((unsigned long) p->addr);
+ /* Get module refcount and reject __init functions for loaded modules. */
if (*probed_mod) {
/*
* We must hold a refcount of the probed module while updating
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index a718067deece..3aae526cc4aa 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -106,6 +106,12 @@ static void s2idle_enter(void)
swait_event_exclusive(s2idle_wait_head,
s2idle_state == S2IDLE_STATE_WAKE);

+ /*
+ * Kick all CPUs to ensure that they resume their timers and restore
+ * consistent system state.
+ */
+ wake_up_all_idle_cpus();
+
cpus_read_unlock();

raw_spin_lock_irq(&s2idle_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 06249b37d801..4461bf43589d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1536,7 +1536,6 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);

- local_inc(&cpu_buffer->pages_touched);
/*
* Just make sure we have seen our old_write and synchronize
* with any interrupts that come in.
@@ -1573,8 +1572,9 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
*/
local_set(&next_page->page->commit, 0);

- /* Again, either we update tail_page or an interrupt does */
- (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
+ /* Either we update tail_page or an interrupt does */
+ if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page))
+ local_inc(&cpu_buffer->pages_touched);
}
}

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 941a394d3911..99f130812286 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1668,6 +1668,7 @@ static int trace_format_open(struct inode *inode, struct file *file)
return 0;
}

+#ifdef CONFIG_PERF_EVENTS
static ssize_t
event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
{
@@ -1682,6 +1683,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)

return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
}
+#endif

static ssize_t
event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
@@ -2126,10 +2128,12 @@ static const struct file_operations ftrace_event_format_fops = {
.release = seq_release,
};

+#ifdef CONFIG_PERF_EVENTS
static const struct file_operations ftrace_event_id_fops = {
.read = event_id_read,
.llseek = default_llseek,
};
+#endif

static const struct file_operations ftrace_event_filter_fops = {
.open = tracing_open_file_tr,
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index b95c36765d04..2243cec18ecc 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -3948,7 +3948,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface)

spin_lock_bh(&bat_priv->tt.commit_lock);

- while (true) {
+ while (timeout) {
table_size = batadv_tt_local_table_transmit_size(bat_priv);
if (packet_size_max >= table_size)
break;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 00e02138003e..efea25eb56ce 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -105,8 +105,10 @@ void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
if (hdev->req_status == HCI_REQ_PEND) {
hdev->req_result = result;
hdev->req_status = HCI_REQ_DONE;
- if (skb)
+ if (skb) {
+ kfree_skb(hdev->req_skb);
hdev->req_skb = skb_get(skb);
+ }
wake_up_interruptible(&hdev->req_wait_q);
}
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index d6c0633bfe5b..1bc58b324b73 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -2679,6 +2679,14 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
return filter_policy;
}

+static void hci_le_scan_phy_params(struct hci_cp_le_scan_phy_params *cp,
+ u8 type, u16 interval, u16 window)
+{
+ cp->type = type;
+ cp->interval = cpu_to_le16(interval);
+ cp->window = cpu_to_le16(window);
+}
+
static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
u16 interval, u16 window,
u8 own_addr_type, u8 filter_policy)
@@ -2686,7 +2694,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
struct hci_cp_le_set_ext_scan_params *cp;
struct hci_cp_le_scan_phy_params *phy;
u8 data[sizeof(*cp) + sizeof(*phy) * 2];
- u8 num_phy = 0;
+ u8 num_phy = 0x00;

cp = (void *)data;
phy = (void *)cp->data;
@@ -2696,28 +2704,64 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
cp->own_addr_type = own_addr_type;
cp->filter_policy = filter_policy;

+ /* Check if PA Sync is in progress then select the PHY based on the
+ * hci_conn.iso_qos.
+ */
+ if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) {
+ struct hci_cp_le_add_to_accept_list *sent;
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_ACCEPT_LIST);
+ if (sent) {
+ struct hci_conn *conn;
+
+ conn = hci_conn_hash_lookup_ba(hdev, ISO_LINK,
+ &sent->bdaddr);
+ if (conn) {
+ struct bt_iso_qos *qos = &conn->iso_qos;
+
+ if (qos->bcast.in.phy & BT_ISO_PHY_1M ||
+ qos->bcast.in.phy & BT_ISO_PHY_2M) {
+ cp->scanning_phys |= LE_SCAN_PHY_1M;
+ hci_le_scan_phy_params(phy, type,
+ interval,
+ window);
+ num_phy++;
+ phy++;
+ }
+
+ if (qos->bcast.in.phy & BT_ISO_PHY_CODED) {
+ cp->scanning_phys |= LE_SCAN_PHY_CODED;
+ hci_le_scan_phy_params(phy, type,
+ interval * 3,
+ window * 3);
+ num_phy++;
+ phy++;
+ }
+
+ if (num_phy)
+ goto done;
+ }
+ }
+ }
+
if (scan_1m(hdev) || scan_2m(hdev)) {
cp->scanning_phys |= LE_SCAN_PHY_1M;
-
- phy->type = type;
- phy->interval = cpu_to_le16(interval);
- phy->window = cpu_to_le16(window);
-
+ hci_le_scan_phy_params(phy, type, interval, window);
num_phy++;
phy++;
}

if (scan_coded(hdev)) {
cp->scanning_phys |= LE_SCAN_PHY_CODED;
-
- phy->type = type;
- phy->interval = cpu_to_le16(interval);
- phy->window = cpu_to_le16(window);
-
+ hci_le_scan_phy_params(phy, type, interval * 3, window * 3);
num_phy++;
phy++;
}

+done:
+ if (!num_phy)
+ return -EINVAL;
+
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_PARAMS,
sizeof(*cp) + sizeof(*phy) * num_phy,
data, HCI_CMD_TIMEOUT);
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 0eeec6480139..2f63ea9e62ec 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -764,10 +764,10 @@ static struct bt_iso_qos default_qos = {
.bcode = {0x00},
.options = 0x00,
.skip = 0x0000,
- .sync_timeout = 0x4000,
+ .sync_timeout = BT_ISO_SYNC_TIMEOUT,
.sync_cte_type = 0x00,
.mse = 0x00,
- .timeout = 0x4000,
+ .timeout = BT_ISO_SYNC_TIMEOUT,
},
};

@@ -1301,8 +1301,8 @@ static bool check_ucast_qos(struct bt_iso_qos *qos)

static bool check_bcast_qos(struct bt_iso_qos *qos)
{
- if (qos->bcast.sync_factor == 0x00)
- return false;
+ if (!qos->bcast.sync_factor)
+ qos->bcast.sync_factor = 0x01;

if (qos->bcast.packing > 0x01)
return false;
@@ -1325,6 +1325,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
if (qos->bcast.skip > 0x01f3)
return false;

+ if (!qos->bcast.sync_timeout)
+ qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
if (qos->bcast.sync_timeout < 0x000a || qos->bcast.sync_timeout > 0x4000)
return false;

@@ -1334,6 +1337,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
if (qos->bcast.mse > 0x1f)
return false;

+ if (!qos->bcast.timeout)
+ qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
if (qos->bcast.timeout < 0x000a || qos->bcast.timeout > 0x4000)
return false;

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index ab5a9d42fae7..706d2478ddb3 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4054,8 +4054,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
return -EPROTO;

hci_dev_lock(hdev);
- if (hci_dev_test_flag(hdev, HCI_MGMT) &&
- !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
+ if (hci_dev_test_flag(hdev, HCI_MGMT))
mgmt_device_connected(hdev, hcon, NULL, 0);
hci_dev_unlock(hdev);

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index c736186aba26..8e4f39b8601c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -823,7 +823,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
- int len, err = 0;
+ int err = 0;
struct bt_voice voice;
u32 opt;
struct bt_codecs *codecs;
@@ -842,10 +842,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}

- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }

if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -862,11 +861,10 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,

voice.setting = sco_pi(sk)->setting;

- len = min_t(unsigned int, sizeof(voice), optlen);
- if (copy_from_sockptr(&voice, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&voice, sizeof(voice), optval,
+ optlen);
+ if (err)
break;
- }

/* Explicitly check for these values */
if (voice.setting != BT_VOICE_TRANSPARENT &&
@@ -889,10 +887,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;

case BT_PKT_STATUS:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }

if (opt)
set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
@@ -933,9 +930,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}

- if (copy_from_sockptr(buffer, optval, optlen)) {
+ err = bt_copy_from_sockptr(buffer, optlen, optval, optlen);
+ if (err) {
hci_dev_put(hdev);
- err = -EFAULT;
break;
}

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b150c9929b12..14365b20f1c5 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -966,6 +966,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;

tmp.name[sizeof(tmp.name)-1] = 0;

@@ -1266,6 +1268,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;

tmp.name[sizeof(tmp.name)-1] = 0;

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 487670759578..fe89a056eb06 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1118,6 +1118,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;

tmp.name[sizeof(tmp.name)-1] = 0;

@@ -1504,6 +1506,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;

tmp.name[sizeof(tmp.name)-1] = 0;

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3bad9aa066db..e1e30c09a175 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -926,13 +926,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
peer->rate_last = jiffies;
++peer->n_redirects;
-#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (log_martians &&
+ if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians &&
peer->n_redirects == ip_rt_redirect_number)
net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
&ip_hdr(skb)->saddr, inet_iif(skb),
&ip_hdr(skb)->daddr, &gw);
-#endif
}
out_put_peer:
inet_putpeer(peer);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6f57cbddeee6..d1806eee1687 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2058,9 +2058,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
- result = ifp;
- in6_ifa_hold(ifp);
- break;
+ if (in6_ifa_hold_safe(ifp)) {
+ result = ifp;
+ break;
+ }
}
}
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 54294f6a8ec5..8184076a3924 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1375,7 +1375,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
struct nl_info *info, struct netlink_ext_ack *extack)
{
struct fib6_table *table = rt->fib6_table;
- struct fib6_node *fn, *pn = NULL;
+ struct fib6_node *fn;
+#ifdef CONFIG_IPV6_SUBTREES
+ struct fib6_node *pn = NULL;
+#endif
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
@@ -1399,9 +1402,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
goto out;
}

+#ifdef CONFIG_IPV6_SUBTREES
pn = fn;

-#ifdef CONFIG_IPV6_SUBTREES
if (rt->fib6_src.plen) {
struct fib6_node *sn;

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 636b360311c5..131f7bb2110d 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1135,6 +1135,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;

tmp.name[sizeof(tmp.name)-1] = 0;

@@ -1513,6 +1515,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;

tmp.name[sizeof(tmp.name)-1] = 0;

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3019a4406ca4..74b63cdb5992 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1380,8 +1380,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
- pr_info_ratelimited("Failed to associated timeout "
- "policy `%s'\n", ct_info.timeout);
+ OVS_NLERR(log,
+ "Failed to associated timeout policy '%s'",
+ ct_info.timeout);
else
ct_info.nf_ct_timeout = rcu_dereference(
nf_ct_timeout_find(ct_info.ct)->timeout);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 510b1d6758db..918724844231 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -992,11 +992,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
sk->sk_write_space = unix_write_space;
sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
sk->sk_destruct = unix_sock_destructor;
- u = unix_sk(sk);
+ u = unix_sk(sk);
+ u->inflight = 0;
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
- atomic_long_set(&u->inflight, 0);
INIT_LIST_HEAD(&u->link);
mutex_init(&u->iolock); /* single task reading lock */
mutex_init(&u->bindlock); /* single task binding lock */
@@ -2589,7 +2589,9 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
}
} else if (!(flags & MSG_PEEK)) {
skb_unlink(skb, &sk->sk_receive_queue);
- consume_skb(skb);
+ WRITE_ONCE(u->oob_skb, NULL);
+ if (!WARN_ON_ONCE(skb_unref(skb)))
+ kfree_skb(skb);
skb = skb_peek(&sk->sk_receive_queue);
}
}
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 027c86e804f8..8734c0c1fc19 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),

static void dec_inflight(struct unix_sock *usk)
{
- atomic_long_dec(&usk->inflight);
+ usk->inflight--;
}

static void inc_inflight(struct unix_sock *usk)
{
- atomic_long_inc(&usk->inflight);
+ usk->inflight++;
}

static void inc_inflight_move_tail(struct unix_sock *u)
{
- atomic_long_inc(&u->inflight);
+ u->inflight++;
+
/* If this still might be part of a cycle, move it to the end
* of the list, so that it's checked even if it was already
* passed over
@@ -234,20 +235,34 @@ void unix_gc(void)
* receive queues. Other, non candidate sockets _can_ be
* added to queue, so we must make sure only to touch
* candidates.
+ *
+ * Embryos, though never candidates themselves, affect which
+ * candidates are reachable by the garbage collector. Before
+ * being added to a listener's queue, an embryo may already
+ * receive data carrying SCM_RIGHTS, potentially making the
+ * passed socket a candidate that is not yet reachable by the
+ * collector. It becomes reachable once the embryo is
+ * enqueued. Therefore, we must ensure that no SCM-laden
+ * embryo appears in a (candidate) listener's queue between
+ * consecutive scan_children() calls.
*/
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+ struct sock *sk = &u->sk;
long total_refs;
- long inflight_refs;

- total_refs = file_count(u->sk.sk_socket->file);
- inflight_refs = atomic_long_read(&u->inflight);
+ total_refs = file_count(sk->sk_socket->file);

- BUG_ON(inflight_refs < 1);
- BUG_ON(total_refs < inflight_refs);
- if (total_refs == inflight_refs) {
+ BUG_ON(!u->inflight);
+ BUG_ON(total_refs < u->inflight);
+ if (total_refs == u->inflight) {
list_move_tail(&u->link, &gc_candidates);
__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+
+ if (sk->sk_state == TCP_LISTEN) {
+ unix_state_lock(sk);
+ unix_state_unlock(sk);
+ }
}
}

@@ -271,7 +286,7 @@ void unix_gc(void)
/* Move cursor to after the current position. */
list_move(&cursor, &u->link);

- if (atomic_long_read(&u->inflight) > 0) {
+ if (u->inflight) {
list_move_tail(&u->link, &not_cycle_list);
__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
scan_children(&u->sk, inc_inflight_move_tail, NULL);
diff --git a/net/unix/scm.c b/net/unix/scm.c
index 822ce0d0d791..e92f2fad6410 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -53,12 +53,13 @@ void unix_inflight(struct user_struct *user, struct file *fp)
if (s) {
struct unix_sock *u = unix_sk(s);

- if (atomic_long_inc_return(&u->inflight) == 1) {
+ if (!u->inflight) {
BUG_ON(!list_empty(&u->link));
list_add_tail(&u->link, &gc_inflight_list);
} else {
BUG_ON(list_empty(&u->link));
}
+ u->inflight++;
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
}
@@ -75,10 +76,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
if (s) {
struct unix_sock *u = unix_sk(s);

- BUG_ON(!atomic_long_read(&u->inflight));
+ BUG_ON(!u->inflight);
BUG_ON(list_empty(&u->link));

- if (atomic_long_dec_and_test(&u->inflight))
+ u->inflight--;
+ if (!u->inflight)
list_del_init(&u->link);
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 2c3ba42bfcdc..93c802cfb9c6 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1332,6 +1332,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q;
int entries;

+ if (optlen < sizeof(entries))
+ return -EINVAL;
if (copy_from_sockptr(&entries, optval, sizeof(entries)))
return -EFAULT;

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 7647c74adb26..d203d314b7b2 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -374,15 +374,15 @@ do_transfer()
TEST_COUNT=$((TEST_COUNT+1))

if [ "$rcvbuf" -gt 0 ]; then
- extra_args="$extra_args -R $rcvbuf"
+ extra_args+=" -R $rcvbuf"
fi

if [ "$sndbuf" -gt 0 ]; then
- extra_args="$extra_args -S $sndbuf"
+ extra_args+=" -S $sndbuf"
fi

if [ -n "$testmode" ]; then
- extra_args="$extra_args -m $testmode"
+ extra_args+=" -m $testmode"
fi

if [ -n "$extra_args" ] && $options_log; then
@@ -503,6 +503,7 @@ do_transfer()
check_transfer $cin $sout "file received by server"
rets=$?

+ local extra=""
local stat_synrx_now_l
local stat_ackrx_now_l
local stat_cookietx_now
@@ -538,7 +539,7 @@ do_transfer()
"${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
rets=1
else
- printf "[ Note ] fallback due to TCP OoO"
+ extra+=" [ Note ] fallback due to TCP OoO"
fi
fi

@@ -561,13 +562,6 @@ do_transfer()
fi
fi

- if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
- printf "[ OK ]"
- mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
- else
- mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
- fi
-
if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
mptcp_lib_pr_fail "unexpected fallback to TCP"
rets=1
@@ -575,30 +569,39 @@ do_transfer()

if [ $cookies -eq 2 ];then
if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
- printf " WARN: CookieSent: did not advance"
+ extra+=" WARN: CookieSent: did not advance"
fi
if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
- printf " WARN: CookieRecv: did not advance"
+ extra+=" WARN: CookieRecv: did not advance"
fi
else
if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
- printf " WARN: CookieSent: changed"
+ extra+=" WARN: CookieSent: changed"
fi
if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
- printf " WARN: CookieRecv: changed"
+ extra+=" WARN: CookieRecv: changed"
fi
fi

if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then
- printf " WARN: SYNRX: expect %d, got %d (probably retransmissions)" \
- "${expect_synrx}" "${stat_synrx_now_l}"
+ extra+=" WARN: SYNRX: expect ${expect_synrx},"
+ extra+=" got ${stat_synrx_now_l} (probably retransmissions)"
fi
if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then
- printf " WARN: ACKRX: expect %d, got %d (probably retransmissions)" \
- "${expect_ackrx}" "${stat_ackrx_now_l}"
+ extra+=" WARN: ACKRX: expect ${expect_ackrx},"
+ extra+=" got ${stat_ackrx_now_l} (probably retransmissions)"
+ fi
+
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
+ printf "[ OK ]%s\n" "${extra}"
+ mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
+ else
+ if [ -n "${extra}" ]; then
+ printf "%s\n" "${extra:1}"
+ fi
+ mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
fi

- echo
cat "$capout"
[ $retc -eq 0 ] && [ $rets -eq 0 ]
}
@@ -924,8 +927,8 @@ mptcp_lib_result_code "${ret}" "ping tests"
stop_if_error "Could not even run ping tests"

[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
-echo -n "INFO: Using loss of $tc_loss "
-test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
+tc_info="loss of $tc_loss "
+test "$tc_delay" -gt 0 && tc_info+="delay $tc_delay ms "

reorder_delay=$((tc_delay / 4))

@@ -936,17 +939,17 @@ if [ -z "${tc_reorder}" ]; then

if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
tc_reorder="reorder ${reorder1}% ${reorder2}%"
- echo -n "$tc_reorder with delay ${reorder_delay}ms "
+ tc_info+="$tc_reorder with delay ${reorder_delay}ms "
fi
elif [ "$tc_reorder" = "0" ];then
tc_reorder=""
elif [ "$reorder_delay" -gt 0 ];then
# reordering requires some delay
tc_reorder="reorder $tc_reorder"
- echo -n "$tc_reorder with delay ${reorder_delay}ms "
+ tc_info+="$tc_reorder with delay ${reorder_delay}ms "
fi

-echo "on ns3eth4"
+echo "INFO: Using ${tc_info}on ns3eth4"

tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 1e1c3d713360..371583009a66 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -822,18 +822,18 @@ pm_nl_check_endpoint()
line="${line% }"
# the dump order is: address id flags port dev
[ -n "$addr" ] && expected_line="$addr"
- expected_line="$expected_line $id"
- [ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}"
- [ -n "$dev" ] && expected_line="$expected_line $dev"
- [ -n "$port" ] && expected_line="$expected_line $port"
+ expected_line+=" $id"
+ [ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}"
+ [ -n "$dev" ] && expected_line+=" $dev"
+ [ -n "$port" ] && expected_line+=" $port"
else
line=$(ip netns exec $ns ./pm_nl_ctl get $_id)
# the dump order is: id flags dev address port
expected_line="$id"
- [ -n "$flags" ] && expected_line="$expected_line $flags"
- [ -n "$dev" ] && expected_line="$expected_line $dev"
- [ -n "$addr" ] && expected_line="$expected_line $addr"
- [ -n "$_port" ] && expected_line="$expected_line $_port"
+ [ -n "$flags" ] && expected_line+=" $flags"
+ [ -n "$dev" ] && expected_line+=" $dev"
+ [ -n "$addr" ] && expected_line+=" $addr"
+ [ -n "$_port" ] && expected_line+=" $_port"
fi
if [ "$line" = "$expected_line" ]; then
print_ok
@@ -1256,7 +1256,7 @@ chk_csum_nr()
print_check "sum"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr")
if [ "$count" != "$csum_ns1" ]; then
- extra_msg="$extra_msg ns1=$count"
+ extra_msg+=" ns1=$count"
fi
if [ -z "$count" ]; then
print_skip
@@ -1269,7 +1269,7 @@ chk_csum_nr()
print_check "csum"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr")
if [ "$count" != "$csum_ns2" ]; then
- extra_msg="$extra_msg ns2=$count"
+ extra_msg+=" ns2=$count"
fi
if [ -z "$count" ]; then
print_skip
@@ -1313,7 +1313,7 @@ chk_fail_nr()
print_check "ftx"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx")
if [ "$count" != "$fail_tx" ]; then
- extra_msg="$extra_msg,tx=$count"
+ extra_msg+=",tx=$count"
fi
if [ -z "$count" ]; then
print_skip
@@ -1327,7 +1327,7 @@ chk_fail_nr()
print_check "failrx"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx")
if [ "$count" != "$fail_rx" ]; then
- extra_msg="$extra_msg,rx=$count"
+ extra_msg+=",rx=$count"
fi
if [ -z "$count" ]; then
print_skip
@@ -1362,7 +1362,7 @@ chk_fclose_nr()
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_tx" ]; then
- extra_msg="$extra_msg,tx=$count"
+ extra_msg+=",tx=$count"
fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
else
print_ok
@@ -1373,7 +1373,7 @@ chk_fclose_nr()
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_rx" ]; then
- extra_msg="$extra_msg,rx=$count"
+ extra_msg+=",rx=$count"
fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
else
print_ok
@@ -1742,7 +1742,7 @@ chk_rm_nr()
count=$((count + cnt))
if [ "$count" != "$rm_subflow_nr" ]; then
suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
- extra_msg="$extra_msg simult"
+ extra_msg+=" simult"
fi
if [ $count -ge "$rm_subflow_nr" ] && \
[ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index 8a17c0e8d82b..9a42403eaff7 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end)
diff = end.tv_usec - start.tv_usec;
diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC;

- if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
+ if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
printf("Diff too high: %lld..", diff);
return -1;
}