Re: Linux 6.18.14

From: Greg Kroah-Hartman

Date: Thu Feb 26 2026 - 18:16:43 EST


diff --git a/Documentation/PCI/endpoint/pci-vntb-howto.rst b/Documentation/PCI/endpoint/pci-vntb-howto.rst
index 9a7a2f0a6849..3679f5c30254 100644
--- a/Documentation/PCI/endpoint/pci-vntb-howto.rst
+++ b/Documentation/PCI/endpoint/pci-vntb-howto.rst
@@ -52,14 +52,14 @@ pci-epf-vntb device, the following commands can be used::
# cd /sys/kernel/config/pci_ep/
# mkdir functions/pci_epf_vntb/func1

-The "mkdir func1" above creates the pci-epf-ntb function device that will
+The "mkdir func1" above creates the pci-epf-vntb function device that will
be probed by pci_epf_vntb driver.

The PCI endpoint framework populates the directory with the following
configurable fields::

- # ls functions/pci_epf_ntb/func1
- baseclass_code deviceid msi_interrupts pci-epf-ntb.0
+ # ls functions/pci_epf_vntb/func1
+ baseclass_code deviceid msi_interrupts pci-epf-vntb.0
progif_code secondary subsys_id vendorid
cache_line_size interrupt_pin msix_interrupts primary
revid subclass_code subsys_vendor_id
@@ -111,13 +111,13 @@ A sample configuration for virtual NTB driver for virtual PCI bus::
# echo 0x080A > functions/pci_epf_vntb/func1/pci_epf_vntb.0/vntb_pid
# echo 0x10 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/vbus_number

-Binding pci-epf-ntb Device to EP Controller
+Binding pci-epf-vntb Device to EP Controller
--------------------------------------------

NTB function device should be attached to PCI endpoint controllers
connected to the host.

- # ln -s controllers/5f010000.pcie_ep functions/pci-epf-ntb/func1/primary
+ # ln -s controllers/5f010000.pcie_ep functions/pci_epf_vntb/func1/primary

Once the above step is completed, the PCI endpoint controllers are ready to
establish a link with the host.
@@ -139,7 +139,7 @@ lspci Output at Host side
-------------------------

Note that the devices listed here correspond to the values populated in
-"Creating pci-epf-ntb Device" section above::
+"Creating pci-epf-vntb Device" section above::

# lspci
00:00.0 PCI bridge: Freescale Semiconductor Inc Device 0000 (rev 01)
@@ -152,7 +152,7 @@ lspci Output at EP Side / Virtual PCI bus
-----------------------------------------

Note that the devices listed here correspond to the values populated in
-"Creating pci-epf-ntb Device" section above::
+"Creating pci-epf-vntb Device" section above::

# lspci
10:00.0 Unassigned class [ffff]: Dawicontrol Computersysteme GmbH Device 1234 (rev ff)
diff --git a/Documentation/devicetree/bindings/media/qcom,qcs8300-camss.yaml b/Documentation/devicetree/bindings/media/qcom,qcs8300-camss.yaml
index 80a4540a22dc..e5f170aa4d9e 100644
--- a/Documentation/devicetree/bindings/media/qcom,qcs8300-camss.yaml
+++ b/Documentation/devicetree/bindings/media/qcom,qcs8300-camss.yaml
@@ -120,6 +120,14 @@ properties:
items:
- const: top

+ vdda-phy-supply:
+ description:
+ Phandle to a 0.88V regulator supply to CSI PHYs.
+
+ vdda-pll-supply:
+ description:
+ Phandle to 1.2V regulator supply to CSI PHYs pll block.
+
ports:
$ref: /schemas/graph.yaml#/properties/ports

@@ -160,6 +168,8 @@ required:
- power-domains
- power-domain-names
- ports
+ - vdda-phy-supply
+ - vdda-pll-supply

additionalProperties: false

@@ -328,6 +338,9 @@ examples:
power-domains = <&camcc CAM_CC_TITAN_TOP_GDSC>;
power-domain-names = "top";

+ vdda-phy-supply = <&vreg_l4a_0p88>;
+ vdda-pll-supply = <&vreg_l1c_1p2>;
+
ports {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/Documentation/devicetree/bindings/phy/qcom,edp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,edp-phy.yaml
index eb97181cbb95..bfc4d75f50ff 100644
--- a/Documentation/devicetree/bindings/phy/qcom,edp-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,edp-phy.yaml
@@ -37,12 +37,15 @@ properties:
- description: PLL register block

clocks:
- maxItems: 2
+ minItems: 2
+ maxItems: 3

clock-names:
+ minItems: 2
items:
- const: aux
- const: cfg_ahb
+ - const: ref

"#clock-cells":
const: 1
@@ -64,6 +67,29 @@ required:
- "#clock-cells"
- "#phy-cells"

+allOf:
+ - if:
+ properties:
+ compatible:
+ enum:
+ - qcom,x1e80100-dp-phy
+ then:
+ properties:
+ clocks:
+ minItems: 3
+ maxItems: 3
+ clock-names:
+ minItems: 3
+ maxItems: 3
+ else:
+ properties:
+ clocks:
+ minItems: 2
+ maxItems: 2
+ clock-names:
+ minItems: 2
+ maxItems: 2
+
additionalProperties: false

examples:
diff --git a/Documentation/devicetree/bindings/sound/asahi-kasei,ak4458.yaml b/Documentation/devicetree/bindings/sound/asahi-kasei,ak4458.yaml
index 1fdbeecc5eff..3a3313ea0890 100644
--- a/Documentation/devicetree/bindings/sound/asahi-kasei,ak4458.yaml
+++ b/Documentation/devicetree/bindings/sound/asahi-kasei,ak4458.yaml
@@ -21,10 +21,10 @@ properties:
reg:
maxItems: 1

- avdd-supply:
+ AVDD-supply:
description: Analog power supply

- dvdd-supply:
+ DVDD-supply:
description: Digital power supply

reset-gpios:
@@ -60,7 +60,7 @@ allOf:
properties:
dsd-path: false

-additionalProperties: false
+unevaluatedProperties: false

examples:
- |
diff --git a/Documentation/devicetree/bindings/sound/asahi-kasei,ak5558.yaml b/Documentation/devicetree/bindings/sound/asahi-kasei,ak5558.yaml
index d3d494ae8abf..dc8f85f266bf 100644
--- a/Documentation/devicetree/bindings/sound/asahi-kasei,ak5558.yaml
+++ b/Documentation/devicetree/bindings/sound/asahi-kasei,ak5558.yaml
@@ -19,10 +19,10 @@ properties:
reg:
maxItems: 1

- avdd-supply:
+ AVDD-supply:
description: A 1.8V supply that powers up the AVDD pin.

- dvdd-supply:
+ DVDD-supply:
description: A 1.2V supply that powers up the DVDD pin.

reset-gpios:
diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst
index be1fc643b645..83118c728ed9 100644
--- a/Documentation/driver-api/dpll.rst
+++ b/Documentation/driver-api/dpll.rst
@@ -198,26 +198,28 @@ be requested with the same attribute with ``DPLL_CMD_DEVICE_SET`` command.
================================== ======================================

Device may also provide ability to adjust a signal phase on a pin.
-If pin phase adjustment is supported, minimal and maximal values that pin
-handle shall be provide to the user on ``DPLL_CMD_PIN_GET`` respond
-with ``DPLL_A_PIN_PHASE_ADJUST_MIN`` and ``DPLL_A_PIN_PHASE_ADJUST_MAX``
+If pin phase adjustment is supported, minimal and maximal values and
+granularity that pin handle shall be provided to the user on
+``DPLL_CMD_PIN_GET`` respond with ``DPLL_A_PIN_PHASE_ADJUST_MIN``,
+``DPLL_A_PIN_PHASE_ADJUST_MAX`` and ``DPLL_A_PIN_PHASE_ADJUST_GRAN``
attributes. Configured phase adjust value is provided with
``DPLL_A_PIN_PHASE_ADJUST`` attribute of a pin, and value change can be
requested with the same attribute with ``DPLL_CMD_PIN_SET`` command.

- =============================== ======================================
- ``DPLL_A_PIN_ID`` configured pin id
- ``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase adjustment
- ``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase adjustment
- ``DPLL_A_PIN_PHASE_ADJUST`` attr configured value of phase
- adjustment on parent dpll device
- ``DPLL_A_PIN_PARENT_DEVICE`` nested attribute for requesting
- configuration on given parent dpll
- device
- ``DPLL_A_PIN_PARENT_ID`` parent dpll device id
- ``DPLL_A_PIN_PHASE_OFFSET`` attr measured phase difference
- between a pin and parent dpll device
- =============================== ======================================
+ ================================ ==========================================
+ ``DPLL_A_PIN_ID`` configured pin id
+ ``DPLL_A_PIN_PHASE_ADJUST_GRAN`` attr granularity of phase adjustment value
+ ``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase adjustment
+ ``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase adjustment
+ ``DPLL_A_PIN_PHASE_ADJUST`` attr configured value of phase
+ adjustment on parent dpll device
+ ``DPLL_A_PIN_PARENT_DEVICE`` nested attribute for requesting
+ configuration on given parent dpll
+ device
+ ``DPLL_A_PIN_PARENT_ID`` parent dpll device id
+ ``DPLL_A_PIN_PHASE_OFFSET`` attr measured phase difference
+ between a pin and parent dpll device
+ ================================ ==========================================

All phase related values are provided in pico seconds, which represents
time difference between signals phase. The negative value means that
@@ -384,6 +386,8 @@ according to attribute purpose.
frequencies
``DPLL_A_PIN_ANY_FREQUENCY_MIN`` attr minimum value of frequency
``DPLL_A_PIN_ANY_FREQUENCY_MAX`` attr maximum value of frequency
+ ``DPLL_A_PIN_PHASE_ADJUST_GRAN`` attr granularity of phase
+ adjustment value
``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase
adjustment
``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase
diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml
index 80728f6f9bc8..78d0724d7e12 100644
--- a/Documentation/netlink/specs/dpll.yaml
+++ b/Documentation/netlink/specs/dpll.yaml
@@ -440,6 +440,12 @@ attribute-sets:
doc: |
Capable pin provides list of pins that can be bound to create a
reference-sync pin pair.
+ -
+ name: phase-adjust-gran
+ type: u32
+ doc: |
+ Granularity of phase adjustment, in picoseconds. The value of
+ phase adjustment must be a multiple of this granularity.

-
name: pin-parent-device
@@ -616,6 +622,7 @@ operations:
- capabilities
- parent-device
- parent-pin
+ - phase-adjust-gran
- phase-adjust-min
- phase-adjust-max
- phase-adjust
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index a06cb99d66dc..7a637e87005f 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -3195,12 +3195,13 @@ enhanced_dad - BOOLEAN
===========

ratelimit - INTEGER
- Limit the maximal rates for sending ICMPv6 messages.
+ Limit the maximal rates for sending ICMPv6 messages to a particular
+ peer.

0 to disable any limiting,
- otherwise the minimal space between responses in milliseconds.
+ otherwise the space between responses in milliseconds.

- Default: 1000
+ Default: 100

ratemask - list of comma separated ranges
For ICMPv6 message types matching the ranges in the ratemask, limit
diff --git a/Documentation/trace/events-pci.rst b/Documentation/trace/events-pci.rst
new file mode 100644
index 000000000000..03ff4ad30ddf
--- /dev/null
+++ b/Documentation/trace/events-pci.rst
@@ -0,0 +1,74 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Subsystem Trace Points: PCI
+===========================
+
+Overview
+========
+The PCI tracing system provides tracepoints to monitor critical hardware events
+that can impact system performance and reliability. These events normally show
+up here:
+
+ /sys/kernel/tracing/events/pci
+
+Cf. include/trace/events/pci.h for the events definitions.
+
+Available Tracepoints
+=====================
+
+pci_hp_event
+------------
+
+Monitors PCI hotplug events including card insertion/removal and link
+state changes.
+::
+
+ pci_hp_event "%s slot:%s, event:%s\n"
+
+**Event Types**:
+
+* ``LINK_UP`` - PCIe link established
+* ``LINK_DOWN`` - PCIe link lost
+* ``CARD_PRESENT`` - Card detected in slot
+* ``CARD_NOT_PRESENT`` - Card removed from slot
+
+**Example Usage**::
+
+ # Enable the tracepoint
+ echo 1 > /sys/kernel/debug/tracing/events/pci/pci_hp_event/enable
+
+ # Monitor events (the following output is generated when a device is hotplugged)
+ cat /sys/kernel/debug/tracing/trace_pipe
+ irq/51-pciehp-88 [001] ..... 1311.177459: pci_hp_event: 0000:00:02.0 slot:10, event:CARD_PRESENT
+
+ irq/51-pciehp-88 [001] ..... 1311.177566: pci_hp_event: 0000:00:02.0 slot:10, event:LINK_UP
+
+pcie_link_event
+---------------
+
+Monitors PCIe link speed changes and provides detailed link status information.
+::
+
+ pcie_link_event "%s type:%d, reason:%d, cur_bus_speed:%d, max_bus_speed:%d, width:%u, flit_mode:%u, status:%s\n"
+
+**Parameters**:
+
+* ``type`` - PCIe device type (4=Root Port, etc.)
+* ``reason`` - Reason for link change:
+
+ - ``0`` - Link retrain
+ - ``1`` - Bus enumeration
+ - ``2`` - Bandwidth notification enable
+ - ``3`` - Bandwidth notification IRQ
+ - ``4`` - Hotplug event
+
+
+**Example Usage**::
+
+ # Enable the tracepoint
+ echo 1 > /sys/kernel/debug/tracing/events/pci/pcie_link_event/enable
+
+ # Monitor events (the following output is generated when a device is hotplugged)
+ cat /sys/kernel/debug/tracing/trace_pipe
+ irq/51-pciehp-88 [001] ..... 381.545386: pcie_link_event: 0000:00:02.0 type:4, reason:4, cur_bus_speed:20, max_bus_speed:23, width:1, flit_mode:0, status:DLLLA
diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst
index b4a429dc4f7a..0a40bfabcf19 100644
--- a/Documentation/trace/index.rst
+++ b/Documentation/trace/index.rst
@@ -54,6 +54,7 @@ applications.
events-power
events-nmi
events-msr
+ events-pci
boottime-trace
histogram
histogram-design
diff --git a/Makefile b/Makefile
index c4b22ec26278..d166d0695099 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 6
PATCHLEVEL = 18
-SUBLEVEL = 13
+SUBLEVEL = 14
EXTRAVERSION =
NAME = Baby Opossum Posse

@@ -1440,6 +1440,15 @@ ifneq ($(wildcard $(resolve_btfids_O)),)
$(Q)$(MAKE) -sC $(srctree)/tools/bpf/resolve_btfids O=$(resolve_btfids_O) clean
endif

+PHONY += objtool_clean
+
+objtool_O = $(abspath $(objtree))/tools/objtool
+
+objtool_clean:
+ifneq ($(wildcard $(objtool_O)),)
+ $(Q)$(MAKE) -sC $(abs_srctree)/tools/objtool O=$(objtool_O) srctree=$(abs_srctree) clean
+endif
+
tools/: FORCE
$(Q)mkdir -p $(objtree)/tools
$(Q)$(MAKE) O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/
@@ -1603,7 +1612,7 @@ vmlinuxclean:
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/link-vmlinux.sh clean
$(Q)$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) clean)

-clean: archclean vmlinuxclean resolve_btfids_clean
+clean: archclean vmlinuxclean resolve_btfids_clean objtool_clean

# mrproper - Delete all generated files, including .config
#
diff --git a/arch/arm/boot/dts/allwinner/sun5i-a13-utoo-p66.dts b/arch/arm/boot/dts/allwinner/sun5i-a13-utoo-p66.dts
index be486d28d04f..428cab5a0e90 100644
--- a/arch/arm/boot/dts/allwinner/sun5i-a13-utoo-p66.dts
+++ b/arch/arm/boot/dts/allwinner/sun5i-a13-utoo-p66.dts
@@ -102,6 +102,7 @@ &touchscreen {
/* The P66 uses a different EINT then the reference design */
interrupts = <6 9 IRQ_TYPE_EDGE_FALLING>; /* EINT9 (PG9) */
/* The icn8318 binding expects wake-gpios instead of power-gpios */
+ /delete-property/ power-gpios;
wake-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */
touchscreen-size-x = <800>;
touchscreen-size-y = <480>;
diff --git a/arch/arm/boot/dts/nxp/lpc/lpc32xx.dtsi b/arch/arm/boot/dts/nxp/lpc/lpc32xx.dtsi
index 2236901a0031..8e9ed93da129 100644
--- a/arch/arm/boot/dts/nxp/lpc/lpc32xx.dtsi
+++ b/arch/arm/boot/dts/nxp/lpc/lpc32xx.dtsi
@@ -302,6 +302,7 @@ i2c2: i2c@400a8000 {
mpwm: pwm@400e8000 {
compatible = "nxp,lpc3220-motor-pwm";
reg = <0x400e8000 0x78>;
+ clocks = <&clk LPC32XX_CLK_MCPWM>;
#pwm-cells = <3>;
status = "disabled";
};
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index e38a30477f3d..566c40f0f7c7 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -161,6 +161,7 @@ static void __init patch_vdso(void *ehdr)
vdso_nullpatch_one(&einfo, "__vdso_gettimeofday");
vdso_nullpatch_one(&einfo, "__vdso_clock_gettime");
vdso_nullpatch_one(&einfo, "__vdso_clock_gettime64");
+ vdso_nullpatch_one(&einfo, "__vdso_clock_getres");
}
}

diff --git a/arch/arm64/boot/dts/amlogic/amlogic-c3.dtsi b/arch/arm64/boot/dts/amlogic/amlogic-c3.dtsi
index 07aaaf71ea9a..f226df3ce153 100644
--- a/arch/arm64/boot/dts/amlogic/amlogic-c3.dtsi
+++ b/arch/arm64/boot/dts/amlogic/amlogic-c3.dtsi
@@ -969,6 +969,10 @@ sdio: mmc@88000 {
no-sd;
resets = <&reset RESET_SD_EMMC_A>;
status = "disabled";
+
+ assigned-clocks = <&clkc_periphs CLKID_SD_EMMC_A>;
+ assigned-clock-rates = <24000000>;
+
};

sd: mmc@8a000 {
@@ -984,6 +988,9 @@ sd: mmc@8a000 {
no-sdio;
resets = <&reset RESET_SD_EMMC_B>;
status = "disabled";
+
+ assigned-clocks = <&clkc_periphs CLKID_SD_EMMC_B>;
+ assigned-clock-rates = <24000000>;
};

nand: nand-controller@8d000 {
diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
index 04fb130ac7c6..bbf94a1f92a1 100644
--- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi
@@ -1960,6 +1960,9 @@ sd_emmc_b: mmc@5000 {
<&clkc CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_B>;
+
+ assigned-clocks = <&clkc CLKID_SD_EMMC_B_CLK0>;
+ assigned-clock-rates = <24000000>;
};

sd_emmc_c: mmc@7000 {
@@ -1972,6 +1975,9 @@ sd_emmc_c: mmc@7000 {
<&clkc CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_C>;
+
+ assigned-clocks = <&clkc CLKID_SD_EMMC_C_CLK0>;
+ assigned-clock-rates = <24000000>;
};

nfc: nand-controller@7800 {
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
index dcc927a9da80..8d8ab775404d 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
@@ -2431,6 +2431,9 @@ sd_emmc_a: mmc@ffe03000 {
<&clkc CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_A>;
+
+ assigned-clocks = <&clkc CLKID_SD_EMMC_A_CLK0>;
+ assigned-clock-rates = <24000000>;
};

sd_emmc_b: mmc@ffe05000 {
@@ -2443,6 +2446,9 @@ sd_emmc_b: mmc@ffe05000 {
<&clkc CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_B>;
+
+ assigned-clocks = <&clkc CLKID_SD_EMMC_B_CLK0>;
+ assigned-clock-rates = <24000000>;
};

sd_emmc_c: mmc@ffe07000 {
@@ -2455,6 +2461,9 @@ sd_emmc_c: mmc@ffe07000 {
<&clkc CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_C>;
+
+ assigned-clocks = <&clkc CLKID_SD_EMMC_C_CLK0>;
+ assigned-clock-rates = <24000000>;
};

usb: usb@ffe09000 {
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index f69923da07fe..a9c830a570cc 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -824,6 +824,9 @@ &sd_emmc_a {
<&clkc CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_A>;
+
+ assigned-clocks = <&clkc CLKID_SD_EMMC_A_CLK0>;
+ assigned-clock-rates = <24000000>;
};

&sd_emmc_b {
@@ -832,6 +835,9 @@ &sd_emmc_b {
<&clkc CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_B>;
+
+ assigned-clocks = <&clkc CLKID_SD_EMMC_B_CLK0>;
+ assigned-clock-rates = <24000000>;
};

&sd_emmc_c {
@@ -840,6 +846,9 @@ &sd_emmc_c {
<&clkc CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_C>;
+
+ assigned-clocks = <&clkc CLKID_SD_EMMC_C_CLK0>;
+ assigned-clock-rates = <24000000>;
};

&simplefb_hdmi {
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index ba535010a3c9..e202d84f0672 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -894,6 +894,9 @@ &sd_emmc_a {
<&clkc CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_A>;
+
+ assigned-clocks = <&clkc CLKID_SD_EMMC_A_CLK0>;
+ assigned-clock-rates = <24000000>;
};

&sd_emmc_b {
@@ -902,6 +905,9 @@ &sd_emmc_b {
<&clkc CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_B>;
+
+ assigned-clocks = <&clkc CLKID_SD_EMMC_B_CLK0>;
+ assigned-clock-rates = <24000000>;
};

&sd_emmc_c {
@@ -910,6 +916,9 @@ &sd_emmc_c {
<&clkc CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_C>;
+
+ assigned-clocks = <&clkc CLKID_SD_EMMC_C_CLK0>;
+ assigned-clock-rates = <24000000>;
};

&simplefb_hdmi {
diff --git a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi
index 9d99ed2994df..dfc0a30a6e61 100644
--- a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi
@@ -819,13 +819,16 @@ sdio: mmc@fe088000 {
reg = <0x0 0xfe088000 0x0 0x800>;
interrupts = <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clkc_periphs CLKID_SDEMMC_A>,
- <&xtal>,
+ <&clkc_periphs CLKID_SD_EMMC_A>,
<&clkc_pll CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_A>;
cap-sdio-irq;
keep-power-in-suspend;
status = "disabled";
+
+ assigned-clocks = <&clkc_periphs CLKID_SD_EMMC_A>;
+ assigned-clock-rates = <24000000>;
};

sd: mmc@fe08a000 {
@@ -838,6 +841,9 @@ sd: mmc@fe08a000 {
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_SD_EMMC_B>;
status = "disabled";
+
+ assigned-clocks = <&clkc_periphs CLKID_SD_EMMC_B>;
+ assigned-clock-rates = <24000000>;
};

emmc: mmc@fe08c000 {
@@ -845,13 +851,16 @@ emmc: mmc@fe08c000 {
reg = <0x0 0xfe08c000 0x0 0x800>;
interrupts = <GIC_SPI 178 IRQ_TYPE_EDGE_RISING>;
clocks = <&clkc_periphs CLKID_NAND>,
- <&xtal>,
+ <&clkc_periphs CLKID_SD_EMMC_C>,
<&clkc_pll CLKID_FCLK_DIV2>;
clock-names = "core", "clkin0", "clkin1";
resets = <&reset RESET_NAND_EMMC>;
no-sdio;
no-sd;
status = "disabled";
+
+ assigned-clocks = <&clkc_periphs CLKID_SD_EMMC_C>;
+ assigned-clock-rates = <24000000>;
};
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mp-ras314.dts b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mp-ras314.dts
index f7346b3d35fe..a122f2ed5f53 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mp-ras314.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mp-ras314.dts
@@ -704,7 +704,7 @@ pinctrl_hdmi: hdmigrp {
fsl,pins = <MX8MP_IOMUXC_HDMI_DDC_SCL__HDMIMIX_HDMI_SCL 0x400001c2>,
<MX8MP_IOMUXC_HDMI_DDC_SDA__HDMIMIX_HDMI_SDA 0x400001c2>,
<MX8MP_IOMUXC_HDMI_HPD__HDMIMIX_HDMI_HPD 0x40000010>,
- <MX8MP_IOMUXC_HDMI_CEC__HDMIMIX_HDMI_CEC 0x40000154>;
+ <MX8MP_IOMUXC_HDMI_CEC__HDMIMIX_HDMI_CEC 0x40000030>;
};

pinctrl_gpt1: gpt1grp {
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts
index 4eedd00d83b9..ac05c05193c5 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts
@@ -860,7 +860,7 @@ pinctrl_hdmi: hdmigrp {
fsl,pins = <MX8MP_IOMUXC_HDMI_DDC_SCL__HDMIMIX_HDMI_SCL 0x400001c2>,
<MX8MP_IOMUXC_HDMI_DDC_SDA__HDMIMIX_HDMI_SDA 0x400001c2>,
<MX8MP_IOMUXC_HDMI_HPD__HDMIMIX_HDMI_HPD 0x40000010>,
- <MX8MP_IOMUXC_HDMI_CEC__HDMIMIX_HDMI_CEC 0x40000010>;
+ <MX8MP_IOMUXC_HDMI_CEC__HDMIMIX_HDMI_CEC 0x40000030>;
};

pinctrl_hoggpio2: hoggpio2grp {
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-pico6.dts b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-pico6.dts
index cce326aec1aa..40af5656d6f1 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-pico6.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-pico6.dts
@@ -91,7 +91,7 @@ bluetooth@2 {

&pio {
bt_pins_wakeup: bt-pins-wakeup {
- piins-bt-wakeup {
+ pins-bt-wakeup {
pinmux = <PINMUX_GPIO42__FUNC_GPIO42>;
input-enable;
};
diff --git a/arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi b/arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi
index 4c983b10dd92..7ace3540ef0a 100644
--- a/arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8994-msft-lumia-octagon.dtsi
@@ -378,7 +378,7 @@ &blsp2_i2c1 {
status = "okay";

sideinteraction: touch@2c {
- compatible = "ad,ad7147_captouch";
+ compatible = "adi,ad7147_captouch";
reg = <0x2c>;

pinctrl-names = "default", "sleep";
diff --git a/arch/arm64/boot/dts/qcom/qcm2290.dtsi b/arch/arm64/boot/dts/qcom/qcm2290.dtsi
index 3b0ba590ee82..e0e400fdd249 100644
--- a/arch/arm64/boot/dts/qcom/qcm2290.dtsi
+++ b/arch/arm64/boot/dts/qcom/qcm2290.dtsi
@@ -1503,8 +1503,12 @@ usb_dwc3_ss: endpoint {

gpu: gpu@5900000 {
compatible = "qcom,adreno-07000200", "qcom,adreno";
- reg = <0x0 0x05900000 0x0 0x40000>;
- reg-names = "kgsl_3d0_reg_memory";
+ reg = <0x0 0x05900000 0x0 0x40000>,
+ <0x0 0x0599e000 0x0 0x1000>,
+ <0x0 0x05961000 0x0 0x800>;
+ reg-names = "kgsl_3d0_reg_memory",
+ "cx_mem",
+ "cx_dbgc";

interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>;

diff --git a/arch/arm64/boot/dts/qcom/qrb4210-rb2.dts b/arch/arm64/boot/dts/qcom/qrb4210-rb2.dts
index bdf2d66e40c6..44ca3e61c33d 100644
--- a/arch/arm64/boot/dts/qcom/qrb4210-rb2.dts
+++ b/arch/arm64/boot/dts/qcom/qrb4210-rb2.dts
@@ -694,7 +694,7 @@ sdc2_card_det_n: sd-card-det-n-state {

&uart3 {
interrupts-extended = <&intc GIC_SPI 330 IRQ_TYPE_LEVEL_HIGH>,
- <&tlmm 11 IRQ_TYPE_LEVEL_HIGH>;
+ <&tlmm 11 IRQ_TYPE_EDGE_FALLING>;
pinctrl-0 = <&uart3_default>;
pinctrl-1 = <&uart3_sleep>;
pinctrl-names = "default", "sleep";
diff --git a/arch/arm64/boot/dts/qcom/sdm630.dtsi b/arch/arm64/boot/dts/qcom/sdm630.dtsi
index 8b1a45a4e56e..b383e480a394 100644
--- a/arch/arm64/boot/dts/qcom/sdm630.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm630.dtsi
@@ -598,8 +598,8 @@ qusb2_hstx_trim: hstx-trim@240 {
};

gpu_speed_bin: gpu-speed-bin@41a0 {
- reg = <0x41a2 0x1>;
- bits = <5 7>;
+ reg = <0x41a2 0x2>;
+ bits = <5 8>;
};
};

diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
index 8abf3e909502..5147d6d3cc26 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
+++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
@@ -379,6 +379,12 @@ vreg_l21a_2p95: ldo21 {
regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
};

+ vreg_l23a_3p3: ldo23 {
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3312000>;
+ regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+ };
+
vreg_l24a_3p075: ldo24 {
regulator-min-microvolt = <3088000>;
regulator-max-microvolt = <3088000>;
@@ -850,7 +856,6 @@ &spi0 {
status = "okay";
pinctrl-names = "default";
pinctrl-0 = <&qup_spi0_default>;
- cs-gpios = <&tlmm 3 GPIO_ACTIVE_LOW>;

can@0 {
compatible = "microchip,mcp2517fd";
@@ -1156,6 +1161,7 @@ &wifi {
vdd-1.8-xo-supply = <&vreg_l7a_1p8>;
vdd-1.3-rfa-supply = <&vreg_l17a_1p3>;
vdd-3.3-ch0-supply = <&vreg_l25a_3p3>;
+ vdd-3.3-ch1-supply = <&vreg_l23a_3p3>;

qcom,snoc-host-cap-8bit-quirk;
qcom,calibration-variant = "Thundercomm_DB845C";
diff --git a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
index 51a9a276399a..1036305231b2 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi
@@ -160,7 +160,6 @@ ts_1p8_supply: ts-1p8-regulator {

gpio = <&tlmm 88 0>;
enable-active-high;
- regulator-boot-on;
};
};

@@ -257,7 +256,7 @@ vreg_l14a_1p88: ldo14 {
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
- regulator-always-on;
+ regulator-boot-on;
};

vreg_l17a_1p3: ldo17 {
diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi
index 91fc36b59abf..8b8395f6a2df 100644
--- a/arch/arm64/boot/dts/qcom/sm6115.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi
@@ -1715,8 +1715,12 @@ usb_dwc3_ss: endpoint {

gpu: gpu@5900000 {
compatible = "qcom,adreno-610.0", "qcom,adreno";
- reg = <0x0 0x05900000 0x0 0x40000>;
- reg-names = "kgsl_3d0_reg_memory";
+ reg = <0x0 0x05900000 0x0 0x40000>,
+ <0x0 0x0599e000 0x0 0x1000>,
+ <0x0 0x05961000 0x0 0x800>;
+ reg-names = "kgsl_3d0_reg_memory",
+ "cx_mem",
+ "cx_dbgc";

/* There's no (real) GMU, so we have to handle quite a bunch of clocks! */
clocks = <&gpucc GPU_CC_GX_GFX3D_CLK>,
diff --git a/arch/arm64/boot/dts/qcom/sm6150.dtsi b/arch/arm64/boot/dts/qcom/sm6150.dtsi
index 64e7c9dbafc7..363b9f436cd0 100644
--- a/arch/arm64/boot/dts/qcom/sm6150.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6150.dtsi
@@ -398,7 +398,6 @@ cdsp_smp2p_in: slave-kernel {

qup_opp_table: opp-table-qup {
compatible = "operating-points-v2";
- opp-shared;

opp-75000000 {
opp-hz = /bits/ 64 <75000000>;
diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
index 662ad694cd91..512a75da4f13 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi
+++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
@@ -791,8 +791,8 @@ soc: soc@0 {

#address-cells = <2>;
#size-cells = <2>;
- dma-ranges = <0 0 0 0 0x10 0>;
- ranges = <0 0 0 0 0x10 0>;
+ dma-ranges = <0 0 0 0 0x100 0>;
+ ranges = <0 0 0 0 0x100 0>;

gcc: clock-controller@100000 {
compatible = "qcom,x1e80100-gcc";
@@ -2910,7 +2910,7 @@ usb_1_ss1_qmpphy: phy@fda000 {
reg = <0 0x00fda000 0 0x4000>;

clocks = <&gcc GCC_USB3_SEC_PHY_AUX_CLK>,
- <&rpmhcc RPMH_CXO_CLK>,
+ <&tcsr TCSR_USB4_1_CLKREF_EN>,
<&gcc GCC_USB3_SEC_PHY_COM_AUX_CLK>,
<&gcc GCC_USB3_SEC_PHY_PIPE_CLK>;
clock-names = "aux",
@@ -2981,7 +2981,7 @@ usb_1_ss2_qmpphy: phy@fdf000 {
reg = <0 0x00fdf000 0 0x4000>;

clocks = <&gcc GCC_USB3_TERT_PHY_AUX_CLK>,
- <&rpmhcc RPMH_CXO_CLK>,
+ <&tcsr TCSR_USB4_2_CLKREF_EN>,
<&gcc GCC_USB3_TERT_PHY_COM_AUX_CLK>,
<&gcc GCC_USB3_TERT_PHY_PIPE_CLK>;
clock-names = "aux",
diff --git a/arch/arm64/boot/dts/renesas/r9a09g047e57-smarc.dts b/arch/arm64/boot/dts/renesas/r9a09g047e57-smarc.dts
index 08e814c03fa8..ed6fcdc337a0 100644
--- a/arch/arm64/boot/dts/renesas/r9a09g047e57-smarc.dts
+++ b/arch/arm64/boot/dts/renesas/r9a09g047e57-smarc.dts
@@ -8,7 +8,6 @@
/dts-v1/;

/* Switch selection settings */
-#define SW_LCD_EN 0
#define SW_GPIO8_CAN0_STB 0
#define SW_GPIO9_CAN1_STB 0
#define SW_LCD_EN 0
diff --git a/arch/arm64/boot/dts/renesas/rzt2h-n2h-evk-common.dtsi b/arch/arm64/boot/dts/renesas/rzt2h-n2h-evk-common.dtsi
index 5c91002c99c4..5384a43837c1 100644
--- a/arch/arm64/boot/dts/renesas/rzt2h-n2h-evk-common.dtsi
+++ b/arch/arm64/boot/dts/renesas/rzt2h-n2h-evk-common.dtsi
@@ -154,8 +154,7 @@ data-pins {
ctrl-pins {
pinmux = <RZT2H_PORT_PINMUX(12, 0, 0x29)>, /* SD0_CLK */
<RZT2H_PORT_PINMUX(12, 1, 0x29)>, /* SD0_CMD */
- <RZT2H_PORT_PINMUX(22, 5, 0x29)>, /* SD0_CD */
- <RZT2H_PORT_PINMUX(22, 6, 0x29)>; /* SD0_WP */
+ <RZT2H_PORT_PINMUX(22, 5, 0x29)>; /* SD0_CD */
};
};

@@ -212,6 +211,7 @@ &sdhi0 {
pinctrl-names = "default", "state_uhs";
vmmc-supply = <&reg_3p3v>;
vqmmc-supply = <&vqmmc_sdhi0>;
+ wp-gpios = <&pinctrl RZT2H_GPIO(22, 6) GPIO_ACTIVE_HIGH>;
bus-width = <4>;
sd-uhs-sdr50;
sd-uhs-sdr104;
diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi b/arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi
index 9cc0901d58fb..c2636e624f18 100644
--- a/arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi
@@ -2378,42 +2378,6 @@ watchdog3: watchdog@2230000 {
assigned-clock-parents = <&k3_clks 351 4>;
};

- watchdog4: watchdog@2240000 {
- compatible = "ti,j7-rti-wdt";
- reg = <0x00 0x2240000 0x00 0x100>;
- clocks = <&k3_clks 352 0>;
- power-domains = <&k3_pds 352 TI_SCI_PD_EXCLUSIVE>;
- assigned-clocks = <&k3_clks 352 0>;
- assigned-clock-parents = <&k3_clks 352 4>;
- };
-
- watchdog5: watchdog@2250000 {
- compatible = "ti,j7-rti-wdt";
- reg = <0x00 0x2250000 0x00 0x100>;
- clocks = <&k3_clks 353 0>;
- power-domains = <&k3_pds 353 TI_SCI_PD_EXCLUSIVE>;
- assigned-clocks = <&k3_clks 353 0>;
- assigned-clock-parents = <&k3_clks 353 4>;
- };
-
- watchdog6: watchdog@2260000 {
- compatible = "ti,j7-rti-wdt";
- reg = <0x00 0x2260000 0x00 0x100>;
- clocks = <&k3_clks 354 0>;
- power-domains = <&k3_pds 354 TI_SCI_PD_EXCLUSIVE>;
- assigned-clocks = <&k3_clks 354 0>;
- assigned-clock-parents = <&k3_clks 354 4>;
- };
-
- watchdog7: watchdog@2270000 {
- compatible = "ti,j7-rti-wdt";
- reg = <0x00 0x2270000 0x00 0x100>;
- clocks = <&k3_clks 355 0>;
- power-domains = <&k3_pds 355 TI_SCI_PD_EXCLUSIVE>;
- assigned-clocks = <&k3_clks 355 0>;
- assigned-clock-parents = <&k3_clks 355 4>;
- };
-
/*
* The following RTI instances are coupled with MCU R5Fs, c7x and
* GPU so keeping them reserved as these will be used by their
diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi b/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi
index 0160fe0da983..78fcd0c40abc 100644
--- a/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi
@@ -6,17 +6,40 @@
*/

&cbass_main {
- c71_3: dsp@67800000 {
- compatible = "ti,j721s2-c71-dsp";
- reg = <0x00 0x67800000 0x00 0x00080000>,
- <0x00 0x67e00000 0x00 0x0000c000>;
- reg-names = "l2sram", "l1dram";
- resets = <&k3_reset 40 1>;
- firmware-name = "j784s4-c71_3-fw";
- ti,sci = <&sms>;
- ti,sci-dev-id = <40>;
- ti,sci-proc-ids = <0x33 0xff>;
- status = "disabled";
+ watchdog4: watchdog@2240000 {
+ compatible = "ti,j7-rti-wdt";
+ reg = <0x00 0x2240000 0x00 0x100>;
+ clocks = <&k3_clks 352 0>;
+ power-domains = <&k3_pds 352 TI_SCI_PD_EXCLUSIVE>;
+ assigned-clocks = <&k3_clks 352 0>;
+ assigned-clock-parents = <&k3_clks 352 4>;
+ };
+
+ watchdog5: watchdog@2250000 {
+ compatible = "ti,j7-rti-wdt";
+ reg = <0x00 0x2250000 0x00 0x100>;
+ clocks = <&k3_clks 353 0>;
+ power-domains = <&k3_pds 353 TI_SCI_PD_EXCLUSIVE>;
+ assigned-clocks = <&k3_clks 353 0>;
+ assigned-clock-parents = <&k3_clks 353 4>;
+ };
+
+ watchdog6: watchdog@2260000 {
+ compatible = "ti,j7-rti-wdt";
+ reg = <0x00 0x2260000 0x00 0x100>;
+ clocks = <&k3_clks 354 0>;
+ power-domains = <&k3_pds 354 TI_SCI_PD_EXCLUSIVE>;
+ assigned-clocks = <&k3_clks 354 0>;
+ assigned-clock-parents = <&k3_clks 354 4>;
+ };
+
+ watchdog7: watchdog@2270000 {
+ compatible = "ti,j7-rti-wdt";
+ reg = <0x00 0x2270000 0x00 0x100>;
+ clocks = <&k3_clks 355 0>;
+ power-domains = <&k3_pds 355 TI_SCI_PD_EXCLUSIVE>;
+ assigned-clocks = <&k3_clks 355 0>;
+ assigned-clock-parents = <&k3_clks 355 4>;
};

pcie2_rc: pcie@2920000 {
@@ -113,6 +136,19 @@ serdes2: serdes@5020000 {
status = "disabled";
};
};
+
+ c71_3: dsp@67800000 {
+ compatible = "ti,j721s2-c71-dsp";
+ reg = <0x00 0x67800000 0x00 0x00080000>,
+ <0x00 0x67e00000 0x00 0x0000c000>;
+ reg-names = "l2sram", "l1dram";
+ resets = <&k3_reset 40 1>;
+ firmware-name = "j784s4-c71_3-fw";
+ ti,sci = <&sms>;
+ ti,sci-dev-id = <40>;
+ ti,sci-proc-ids = <0x33 0xff>;
+ status = "disabled";
+ };
};

&scm_conf {
diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c
index 6e93f78de79b..04a23a497f20 100644
--- a/arch/arm64/mm/gcs.c
+++ b/arch/arm64/mm/gcs.c
@@ -199,8 +199,8 @@ int arch_set_shadow_stack_status(struct task_struct *task, unsigned long arg)

size = gcs_size(0);
gcs = alloc_gcs(0, size);
- if (!gcs)
- return -ENOMEM;
+ if (IS_ERR_VALUE(gcs))
+ return gcs;

task->thread.gcspr_el0 = gcs + size - sizeof(u64);
task->thread.gcs_base = gcs;
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 0dfefeedfe56..83a6ca613f9c 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2939,7 +2939,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
u64 plt_target = 0ULL;
bool poking_bpf_entry;

- if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
+ if (!bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
/* Only poking bpf text is supported. Since kernel function
* entry is set up by ftrace, we reply on ftrace to poke kernel
* functions.
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 87ff02513787..e9d666508ae2 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1318,7 +1318,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
/* Only poking bpf text is supported. Since kernel function entry
* is set up by ftrace, we rely on ftrace to poke kernel functions.
*/
- if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
+ if (!bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
return -ENOTSUPP;

image = ip - offset;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index e8683f58fd3e..83a6b68d8a39 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1408,7 +1408,6 @@ config CPU_LOONGSON32
select CPU_MIPS32
select CPU_MIPSR2
select CPU_HAS_PREFETCH
- select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_CPUFREQ
diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c
index 7f1c136ad850..59833210542f 100644
--- a/arch/mips/kernel/relocate.c
+++ b/arch/mips/kernel/relocate.c
@@ -420,7 +420,20 @@ void *__init relocate_kernel(void)
goto out;

/* The current thread is now within the relocated image */
+#ifndef CONFIG_CC_IS_CLANG
__current_thread_info = RELOCATED(&init_thread_union);
+#else
+ /*
+ * LLVM may wrongly restore $gp ($28) in epilog even if it's
+ * intentionally modified. Work around this by using inline
+ * assembly to assign $gp. $gp couldn't be listed as output or
+ * clobber, or LLVM will still restore its original value.
+ * See also LLVM upstream issue
+ * https://github.com/llvm/llvm-project/issues/176546
+ */
+ asm volatile("move $28, %0" : :
+ "r" (RELOCATED(&init_thread_union)));
+#endif

/* Return the new kernel's entry point */
kernel_entry = RELOCATED(start_kernel);
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 5e34611de9ef..b7ebb4ac2c71 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -289,6 +289,8 @@ void eeh_pe_dev_traverse(struct eeh_pe *root,
void eeh_pe_restore_bars(struct eeh_pe *pe);
const char *eeh_pe_loc_get(struct eeh_pe *pe);
struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
+const char *eeh_pe_loc_get_bus(struct pci_bus *bus);
+struct pci_bus *eeh_pe_bus_get_nolock(struct eeh_pe *pe);

void eeh_show_enabled(void);
int __init eeh_init(struct eeh_ops *ops);
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index dab63b82a8d4..f2009d7c8cfa 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -134,7 +134,6 @@ static __always_inline void kuap_assert_locked(void)

static __always_inline void allow_read_from_user(const void __user *from, unsigned long size)
{
- barrier_nospec();
allow_user_access(NULL, from, size, KUAP_READ);
}

@@ -146,7 +145,6 @@ static __always_inline void allow_write_to_user(void __user *to, unsigned long s
static __always_inline void allow_read_write_user(void __user *to, const void __user *from,
unsigned long size)
{
- barrier_nospec();
allow_user_access(to, from, size, KUAP_READ_WRITE);
}

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 4f5a46a77fa2..3987a5c33558 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -301,6 +301,7 @@ do { \
__typeof__(sizeof(*(ptr))) __gu_size = sizeof(*(ptr)); \
\
might_fault(); \
+ barrier_nospec(); \
allow_read_from_user(__gu_addr, __gu_size); \
__get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \
prevent_read_from_user(__gu_addr, __gu_size); \
@@ -329,6 +330,7 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
unsigned long ret;

+ barrier_nospec();
allow_read_write_user(to, from, n);
ret = __copy_tofrom_user(to, from, n);
prevent_read_write_user(to, from, n);
@@ -415,6 +417,7 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt

might_fault();

+ barrier_nospec();
allow_read_write_user((void __user *)ptr, ptr, len);
return true;
}
@@ -431,6 +434,7 @@ user_read_access_begin(const void __user *ptr, size_t len)

might_fault();

+ barrier_nospec();
allow_read_from_user(ptr, len);
return true;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index ef78ff77cf8f..028f69158532 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -846,7 +846,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)

pci_lock_rescan_remove();

- bus = eeh_pe_bus_get(pe);
+ bus = eeh_pe_bus_get_nolock(pe);
if (!bus) {
pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->addr);
@@ -886,14 +886,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
/* Log the event */
if (pe->type & EEH_PE_PHB) {
pr_err("EEH: Recovering PHB#%x, location: %s\n",
- pe->phb->global_number, eeh_pe_loc_get(pe));
+ pe->phb->global_number, eeh_pe_loc_get_bus(bus));
} else {
struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);

pr_err("EEH: Recovering PHB#%x-PE#%x\n",
pe->phb->global_number, pe->addr);
pr_err("EEH: PE location: %s, PHB location: %s\n",
- eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
+ eeh_pe_loc_get_bus(bus),
+ eeh_pe_loc_get_bus(eeh_pe_bus_get_nolock(phb_pe)));
}

#ifdef CONFIG_STACKTRACE
@@ -1098,7 +1099,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);

- bus = eeh_pe_bus_get(pe);
+ bus = eeh_pe_bus_get_nolock(pe);
if (bus)
pci_hp_remove_devices(bus);
else
@@ -1222,7 +1223,7 @@ void eeh_handle_special_event(void)
(phb_pe->state & EEH_PE_RECOVERING))
continue;

- bus = eeh_pe_bus_get(phb_pe);
+ bus = eeh_pe_bus_get_nolock(phb_pe);
if (!bus) {
pr_err("%s: Cannot find PCI bus for "
"PHB#%x-PE#%x\n",
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index e740101fadf3..040e8f69a4aa 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -812,6 +812,24 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)
const char *eeh_pe_loc_get(struct eeh_pe *pe)
{
struct pci_bus *bus = eeh_pe_bus_get(pe);
+ return eeh_pe_loc_get_bus(bus);
+}
+
+/**
+ * eeh_pe_loc_get_bus - Retrieve location code binding to the given PCI bus
+ * @bus: PCI bus
+ *
+ * Retrieve the location code associated with the given PCI bus. If the bus
+ * is a root bus, the location code is fetched from the PHB device tree node
+ * or root port. Otherwise, the location code is obtained from the device
+ * tree node of the upstream bridge of the bus. The function walks up the
+ * bus hierarchy if necessary, checking each node for the appropriate
+ * location code property ("ibm,io-base-loc-code" for root buses,
+ * "ibm,slot-location-code" for others). If no location code is found,
+ * returns "N/A".
+ */
+const char *eeh_pe_loc_get_bus(struct pci_bus *bus)
+{
struct device_node *dn;
const char *loc = NULL;

@@ -838,8 +856,9 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe)
}

/**
- * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
+ * _eeh_pe_bus_get - Retrieve PCI bus according to the given PE
* @pe: EEH PE
+ * @do_lock: Is the caller already held the pci_lock_rescan_remove?
*
* Retrieve the PCI bus according to the given PE. Basically,
* there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
@@ -847,7 +866,7 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe)
* returned for BUS PE. However, we don't have associated PCI
* bus for DEVICE PE.
*/
-struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
+static struct pci_bus *_eeh_pe_bus_get(struct eeh_pe *pe, bool do_lock)
{
struct eeh_dev *edev;
struct pci_dev *pdev;
@@ -862,11 +881,58 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)

/* Retrieve the parent PCI bus of first (top) PCI device */
edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
- pci_lock_rescan_remove();
+ if (do_lock)
+ pci_lock_rescan_remove();
pdev = eeh_dev_to_pci_dev(edev);
if (pdev)
bus = pdev->bus;
- pci_unlock_rescan_remove();
+ if (do_lock)
+ pci_unlock_rescan_remove();

return bus;
}
+
+/**
+ * eeh_pe_bus_get - Retrieve PCI bus associated with the given EEH PE, locking
+ * if needed
+ * @pe: Pointer to the EEH PE
+ *
+ * This function is a wrapper around _eeh_pe_bus_get(), which retrieves the PCI
+ * bus associated with the provided EEH PE structure. It acquires the PCI
+ * rescans lock to ensure safe access to shared data during the retrieval
+ * process. This function should be used when the caller requires the PCI bus
+ * while holding the rescan/remove lock, typically during operations that modify
+ * or inspect PCIe device state in a safe manner.
+ *
+ * RETURNS:
+ * A pointer to the PCI bus associated with the EEH PE, or NULL if none found.
+ */
+
+struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
+{
+ return _eeh_pe_bus_get(pe, true);
+}
+
+/**
+ * eeh_pe_bus_get_nolock - Retrieve PCI bus associated with the given EEH PE
+ * without locking
+ * @pe: Pointer to the EEH PE
+ *
+ * This function is a variant of _eeh_pe_bus_get() that retrieves the PCI bus
+ * associated with the specified EEH PE without acquiring the
+ * pci_lock_rescan_remove lock. It should only be used when the caller can
+ * guarantee safe access to PE structures without the need for that lock,
+ * typically in contexts where the lock is already held locking is otherwise
+ * managed.
+ *
+ * RETURNS:
+ * pointer to the PCI bus associated with the EEH PE, or NULL if none is found.
+ *
+ * NOTE:
+ * Use this function carefully to avoid race conditions and data corruption.
+ */
+
+struct pci_bus *eeh_pe_bus_get_nolock(struct eeh_pe *pe)
+{
+ return _eeh_pe_bus_get(pe, false);
+}
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 88ad5ba7b87f..21f7f26a5e2f 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -1122,7 +1122,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
branch_flags = poke_type == BPF_MOD_CALL ? BRANCH_SET_LINK : 0;

/* We currently only support poking bpf programs */
- if (!__bpf_address_lookup(bpf_func, &size, &offset, name)) {
+ if (!bpf_address_lookup(bpf_func, &size, &offset, name)) {
pr_err("%s (0x%lx): kernel/modules are not supported\n", __func__, bpf_func);
return -EOPNOTSUPP;
}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index df22b10d9141..e60d2b823e09 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -270,6 +270,7 @@ config S390
select SPARSE_IRQ
select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
+ select SYSTEM_DATA_VERIFICATION if KEXEC_SIG
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
select TTY
@@ -296,7 +297,7 @@ config ARCH_SUPPORTS_KEXEC_FILE
def_bool y

config ARCH_SUPPORTS_KEXEC_SIG
- def_bool MODULE_SIG_FORMAT
+ def_bool y

config ARCH_SUPPORTS_KEXEC_PURGATORY
def_bool y
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index dd9ff120ad43..56df4855f38e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -3101,8 +3101,8 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
cap->version = x86_pmu.version;
cap->num_counters_gp = x86_pmu_num_counters(NULL);
cap->num_counters_fixed = x86_pmu_num_counters_fixed(NULL);
- cap->bit_width_gp = x86_pmu.cntval_bits;
- cap->bit_width_fixed = x86_pmu.cntval_bits;
+ cap->bit_width_gp = cap->num_counters_gp ? x86_pmu.cntval_bits : 0;
+ cap->bit_width_fixed = cap->num_counters_fixed ? x86_pmu.cntval_bits : 0;
cap->events_mask = (unsigned int)x86_pmu.events_maskl;
cap->events_mask_len = x86_pmu.events_mask_len;
cap->pebs_ept = x86_pmu.pebs_ept;
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 042e8712d8de..8aafccf7a52c 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -105,7 +105,7 @@ static void hv_vtl_ap_entry(void)

static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored)
{
- u64 status;
+ u64 status, rsp, rip;
int ret = 0;
struct hv_enable_vp_vtl *input;
unsigned long irq_flags;
@@ -118,9 +118,11 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored)
struct desc_struct *gdt;

struct task_struct *idle = idle_thread_get(cpu);
- u64 rsp = (unsigned long)idle->thread.sp;
+ if (IS_ERR(idle))
+ return PTR_ERR(idle);

- u64 rip = (u64)&hv_vtl_ap_entry;
+ rsp = (unsigned long)idle->thread.sp;
+ rip = (u64)&hv_vtl_ap_entry;

native_store_gdt(&gdt_ptr);
store_idt(&idt_ptr);
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b08c95872eed..c56e1e63b893 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -57,7 +57,7 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
}

#define arch_ftrace_partial_regs(regs) do { \
- regs->flags &= ~X86_EFLAGS_FIXED; \
+ regs->flags |= X86_EFLAGS_FIXED; \
regs->cs = __KERNEL_CS; \
} while (0)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 5d46709c58d0..a92750f3079a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -951,26 +951,14 @@ static void init_amd_zen1(struct cpuinfo_x86 *c)
}
}

-static bool cpu_has_zenbleed_microcode(void)
-{
- u32 good_rev = 0;
-
- switch (boot_cpu_data.x86_model) {
- case 0x30 ... 0x3f: good_rev = 0x0830107b; break;
- case 0x60 ... 0x67: good_rev = 0x0860010c; break;
- case 0x68 ... 0x6f: good_rev = 0x08608107; break;
- case 0x70 ... 0x7f: good_rev = 0x08701033; break;
- case 0xa0 ... 0xaf: good_rev = 0x08a00009; break;
-
- default:
- return false;
- }
-
- if (boot_cpu_data.microcode < good_rev)
- return false;
-
- return true;
-}
+static const struct x86_cpu_id amd_zenbleed_microcode[] = {
+ ZEN_MODEL_STEP_UCODE(0x17, 0x31, 0x0, 0x0830107b),
+ ZEN_MODEL_STEP_UCODE(0x17, 0x60, 0x1, 0x0860010c),
+ ZEN_MODEL_STEP_UCODE(0x17, 0x68, 0x1, 0x08608107),
+ ZEN_MODEL_STEP_UCODE(0x17, 0x71, 0x0, 0x08701033),
+ ZEN_MODEL_STEP_UCODE(0x17, 0xa0, 0x0, 0x08a00009),
+ {}
+};

static void zen2_zenbleed_check(struct cpuinfo_x86 *c)
{
@@ -980,7 +968,7 @@ static void zen2_zenbleed_check(struct cpuinfo_x86 *c)
if (!cpu_has(c, X86_FEATURE_AVX))
return;

- if (!cpu_has_zenbleed_microcode()) {
+ if (!x86_match_min_microcode_rev(amd_zenbleed_microcode)) {
pr_notice_once("Zenbleed: please update your microcode for the most optimal fix\n");
msr_set_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
} else {
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 823dbdd0eb41..ae94f7b0fdf1 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -354,6 +354,9 @@ SYM_CODE_START(return_to_handler)
UNWIND_HINT_UNDEFINED
ANNOTATE_NOENDBR

+ /* Store original rsp for pt_regs.sp value. */
+ movq %rsp, %rdi
+
/* Restore return_to_handler value that got eaten by previous ret instruction. */
subq $8, %rsp
UNWIND_HINT_FUNC
@@ -364,7 +367,7 @@ SYM_CODE_START(return_to_handler)
movq %rax, RAX(%rsp)
movq %rdx, RDX(%rsp)
movq %rbp, RBP(%rsp)
- movq %rsp, RSP(%rsp)
+ movq %rdi, RSP(%rsp)
movq %rsp, %rdi

call ftrace_return_to_handler
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 53282dc7d5ac..23b91bf9b663 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -470,7 +470,7 @@ int __init arch_xen_unpopulated_init(struct resource **res)
* driver to know how much of the physmap is unpopulated and
* set an accurate initial memory target.
*/
- xen_released_pages += xen_extra_mem[i].n_pfns;
+ xen_unpopulated_pages += xen_extra_mem[i].n_pfns;
/* Zero so region is not also added to the balloon driver. */
xen_extra_mem[i].n_pfns = 0;
}
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 75246c481fa5..4610f491f088 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -294,7 +294,7 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
ret = -EINVAL;
goto out;
}
- amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);

if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
@@ -317,6 +317,9 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
struct dma_fence *fence;
int ret;

+ if (hwctx->status != HWCTX_STAT_READY)
+ return NULL;
+
if (!mmget_not_zero(job->mm))
return ERR_PTR(-ESRCH);

@@ -684,7 +687,10 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
aie2_hwctx_wait_for_idle(hwctx);

/* Request fw to destroy hwctx and cancel the rest pending requests */
+ drm_sched_stop(&hwctx->priv->sched, NULL);
aie2_release_resource(hwctx);
+ hwctx->status = HWCTX_STAT_STOP;
+ drm_sched_start(&hwctx->priv->sched, 0);

mutex_unlock(&xdna->dev_lock);
drm_sched_entity_destroy(&hwctx->priv->entity);
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index 6634a4d5717f..a80c77a478bf 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -206,26 +206,34 @@ mailbox_send_msg(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg)
u32 head, tail;
u32 start_addr;
u32 tmp_tail;
+ int ret;

head = mailbox_get_headptr(mb_chann, CHAN_RES_X2I);
tail = mb_chann->x2i_tail;
- ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I);
+ ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_X2I) - sizeof(u32);
start_addr = mb_chann->res[CHAN_RES_X2I].rb_start_addr;
tmp_tail = tail + mb_msg->pkg_size;

- if (tail < head && tmp_tail >= head)
- goto no_space;
-
- if (tail >= head && (tmp_tail > ringbuf_size - sizeof(u32) &&
- mb_msg->pkg_size >= head))
- goto no_space;

- if (tail >= head && tmp_tail > ringbuf_size - sizeof(u32)) {
+check_again:
+ if (tail >= head && tmp_tail > ringbuf_size) {
write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail;
writel(TOMBSTONE, write_addr);

/* tombstone is set. Write from the start of the ringbuf */
tail = 0;
+ tmp_tail = tail + mb_msg->pkg_size;
+ }
+
+ if (tail < head && tmp_tail >= head) {
+ ret = read_poll_timeout(mailbox_get_headptr, head,
+ tmp_tail < head || tail >= head,
+ 1, 100, false, mb_chann, CHAN_RES_X2I);
+ if (ret)
+ return ret;
+
+ if (tail >= head)
+ goto check_again;
}

write_addr = mb_chann->mb->res.ringbuf_base + start_addr + tail;
@@ -237,9 +245,6 @@ mailbox_send_msg(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg)
mb_msg->pkg.header.id);

return 0;
-
-no_space:
- return -ENOSPC;
}

static int
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 569cd703729d..88643e28af84 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -88,6 +88,8 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
ret = -ENODEV;
goto unbind_sva;
}
+ client->mm = current->mm;
+ mmgrab(client->mm);
init_srcu_struct(&client->hwctx_srcu);
xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
mutex_init(&client->mm_lock);
@@ -127,6 +129,7 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
drm_gem_object_put(to_gobj(client->dev_heap));

iommu_sva_unbind_device(client->sva);
+ mmdrop(client->mm);

XDNA_DBG(xdna, "pid %d closed", client->pid);
kfree(client);
@@ -292,7 +295,7 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
fs_reclaim_release(GFP_KERNEL);
}

- xdna->notifier_wq = alloc_ordered_workqueue("notifier_wq", 0);
+ xdna->notifier_wq = alloc_ordered_workqueue("notifier_wq", WQ_MEM_RECLAIM);
if (!xdna->notifier_wq)
return -ENOMEM;

diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index 72d6696d49da..64009ca24982 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -128,6 +128,7 @@ struct amdxdna_client {

struct iommu_sva *sva;
int pasid;
+ struct mm_struct *mm;
};

#define amdxdna_for_each_hwctx(client, hwctx_id, entry) \
diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.c b/drivers/accel/amdxdna/amdxdna_ubuf.c
index 077b2261cf2a..9e3b3b055caa 100644
--- a/drivers/accel/amdxdna/amdxdna_ubuf.c
+++ b/drivers/accel/amdxdna/amdxdna_ubuf.c
@@ -34,15 +34,21 @@ static struct sg_table *amdxdna_ubuf_map(struct dma_buf_attachment *attach,
ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->nr_pages, 0,
ubuf->nr_pages << PAGE_SHIFT, GFP_KERNEL);
if (ret)
- return ERR_PTR(ret);
+ goto err_free_sg;

if (ubuf->flags & AMDXDNA_UBUF_FLAG_MAP_DMA) {
ret = dma_map_sgtable(attach->dev, sg, direction, 0);
if (ret)
- return ERR_PTR(ret);
+ goto err_free_table;
}

return sg;
+
+err_free_table:
+ sg_free_table(sg);
+err_free_sg:
+ kfree(sg);
+ return ERR_PTR(ret);
}

static void amdxdna_ubuf_unmap(struct dma_buf_attachment *attach,
diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c
index fa3475da7ea9..b6198f73c81d 100644
--- a/drivers/acpi/acpica/evregion.c
+++ b/drivers/acpi/acpica/evregion.c
@@ -163,7 +163,9 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
return_ACPI_STATUS(AE_NOT_EXIST);
}

- if (region_obj->region.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
+ if (field_obj
+ && region_obj->region.space_id ==
+ ACPI_ADR_SPACE_PLATFORM_COMM) {
struct acpi_pcc_info *ctx =
handler_desc->address_space.context;

diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index e66e20d1f31b..b59b0100d03c 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -362,7 +362,7 @@ static int send_pcc_cmd(int pcc_ss_id, u16 cmd)
end:
if (cmd == CMD_WRITE) {
if (unlikely(ret)) {
- for_each_possible_cpu(i) {
+ for_each_online_cpu(i) {
struct cpc_desc *desc = per_cpu(cpc_desc_ptr, i);

if (!desc)
@@ -524,7 +524,7 @@ int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data)
else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY)
cpu_data->shared_type = CPUFREQ_SHARED_TYPE_ANY;

- for_each_possible_cpu(i) {
+ for_each_online_cpu(i) {
if (i == cpu)
continue;

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 361a7721a6a8..7da5ae5594a7 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -1113,6 +1113,19 @@ static const struct dmi_system_id dmi_leave_unused_power_resources_on[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE Click Mini L9W-B"),
},
},
+ {
+ /*
+ * THUNDEROBOT ZERO laptop: Due to its SSDT table bug, power
+ * resource 'PXP' will be shut down on initialization, making
+ * the NVMe #2 and the NVIDIA dGPU both unavailable (they're
+ * both controlled by 'PXP').
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "THUNDEROBOT"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ZERO"),
+ }
+
+ },
{}
};

diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 65e779be64ff..7644de24d2fa 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -166,7 +166,7 @@ static int __acpi_processor_start(struct acpi_device *device)
if (result && !IS_ENABLED(CONFIG_ACPI_CPU_FREQ_PSS))
dev_dbg(&device->dev, "CPPC data invalid or not present\n");

- if (!cpuidle_get_driver() || cpuidle_get_driver() == &acpi_idle_driver)
+ if (cpuidle_get_driver() == &acpi_idle_driver)
acpi_processor_power_init(pr);

acpi_pss_perf_init(pr);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 699919e4579e..b29067759cc2 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5552,6 +5552,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
mutex_init(&ap->scsi_scan_mutex);
INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
+ INIT_WORK(&ap->deferred_qc_work, ata_scsi_deferred_qc_work);
INIT_LIST_HEAD(&ap->eh_done_q);
init_waitqueue_head(&ap->eh_wait_q);
init_completion(&ap->park_req_pending);
@@ -6172,9 +6173,11 @@ static void ata_port_detach(struct ata_port *ap)
/* wait till EH commits suicide */
ata_port_wait_eh(ap);

- /* it better be dead now */
+ /* It better be dead now and not have any remaining deferred qc. */
WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
+ WARN_ON(ap->deferred_qc);

+ cancel_work_sync(&ap->deferred_qc_work);
cancel_delayed_work_sync(&ap->hotplug_task);
cancel_delayed_work_sync(&ap->scsi_rescan_task);

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 2586e77ebf45..258e657f3527 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -640,12 +640,28 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
set_host_byte(scmd, DID_OK);

ata_qc_for_each_raw(ap, qc, i) {
- if (qc->flags & ATA_QCFLAG_ACTIVE &&
- qc->scsicmd == scmd)
+ if (qc->scsicmd != scmd)
+ continue;
+ if ((qc->flags & ATA_QCFLAG_ACTIVE) ||
+ qc == ap->deferred_qc)
break;
}

- if (i < ATA_MAX_QUEUE) {
+ if (qc == ap->deferred_qc) {
+ /*
+ * This is a deferred command that timed out while
+ * waiting for the command queue to drain. Since the qc
+ * is not active yet (deferred_qc is still set, so the
+ * deferred qc work has not issued the command yet),
+ * simply signal the timeout by finishing the SCSI
+ * command and clear the deferred qc to prevent the
+ * deferred qc work from issuing this qc.
+ */
+ WARN_ON_ONCE(qc->flags & ATA_QCFLAG_ACTIVE);
+ ap->deferred_qc = NULL;
+ set_host_byte(scmd, DID_TIME_OUT);
+ scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
+ } else if (i < ATA_MAX_QUEUE) {
/* the scmd has an associated qc */
if (!(qc->flags & ATA_QCFLAG_EH)) {
/* which hasn't failed yet, timeout */
@@ -917,6 +933,12 @@ static void ata_eh_set_pending(struct ata_port *ap, bool fastdrain)

ap->pflags |= ATA_PFLAG_EH_PENDING;

+ /*
+ * If we have a deferred qc, requeue it so that it is retried once EH
+ * completes.
+ */
+ ata_scsi_requeue_deferred_qc(ap);
+
if (!fastdrain)
return;

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 434774e71fe6..27ad14599605 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1657,8 +1657,77 @@ static void ata_qc_done(struct ata_queued_cmd *qc)
done(cmd);
}

+void ata_scsi_deferred_qc_work(struct work_struct *work)
+{
+ struct ata_port *ap =
+ container_of(work, struct ata_port, deferred_qc_work);
+ struct ata_queued_cmd *qc;
+ unsigned long flags;
+
+ spin_lock_irqsave(ap->lock, flags);
+
+ /*
+ * If we still have a deferred qc and we are not in EH, issue it. In
+ * such case, we should not need any more deferring the qc, so warn if
+ * qc_defer() says otherwise.
+ */
+ qc = ap->deferred_qc;
+ if (qc && !ata_port_eh_scheduled(ap)) {
+ WARN_ON_ONCE(ap->ops->qc_defer(qc));
+ ap->deferred_qc = NULL;
+ ata_qc_issue(qc);
+ }
+
+ spin_unlock_irqrestore(ap->lock, flags);
+}
+
+void ata_scsi_requeue_deferred_qc(struct ata_port *ap)
+{
+ struct ata_queued_cmd *qc = ap->deferred_qc;
+ struct scsi_cmnd *scmd;
+
+ lockdep_assert_held(ap->lock);
+
+ /*
+ * If we have a deferred qc when a reset occurs or NCQ commands fail,
+ * do not try to be smart about what to do with this deferred command
+ * and simply retry it by completing it with DID_SOFT_ERROR.
+ */
+ if (!qc)
+ return;
+
+ scmd = qc->scsicmd;
+ ap->deferred_qc = NULL;
+ ata_qc_free(qc);
+ scmd->result = (DID_SOFT_ERROR << 16);
+ scsi_done(scmd);
+}
+
+static void ata_scsi_schedule_deferred_qc(struct ata_port *ap)
+{
+ struct ata_queued_cmd *qc = ap->deferred_qc;
+
+ lockdep_assert_held(ap->lock);
+
+ /*
+ * If we have a deferred qc, then qc_defer() is defined and we can use
+ * this callback to determine if this qc is good to go, unless EH has
+ * been scheduled.
+ */
+ if (!qc)
+ return;
+
+ if (ata_port_eh_scheduled(ap)) {
+ ata_scsi_requeue_deferred_qc(ap);
+ return;
+ }
+ if (!ap->ops->qc_defer(qc))
+ queue_work(system_highpri_wq, &ap->deferred_qc_work);
+}
+
static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
{
+ struct ata_port *ap = qc->ap;
struct scsi_cmnd *cmd = qc->scsicmd;
u8 *cdb = cmd->cmnd;
bool have_sense = qc->flags & ATA_QCFLAG_SENSE_VALID;
@@ -1688,6 +1757,66 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
}

ata_qc_done(qc);
+
+ ata_scsi_schedule_deferred_qc(ap);
+}
+
+static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc)
+{
+ int ret;
+
+ if (!ap->ops->qc_defer)
+ goto issue;
+
+ /*
+ * If we already have a deferred qc, then rely on the SCSI layer to
+ * requeue and defer all incoming commands until the deferred qc is
+ * processed, once all on-going commands complete.
+ */
+ if (ap->deferred_qc) {
+ ata_qc_free(qc);
+ return SCSI_MLQUEUE_DEVICE_BUSY;
+ }
+
+ /* Check if the command needs to be deferred. */
+ ret = ap->ops->qc_defer(qc);
+ switch (ret) {
+ case 0:
+ break;
+ case ATA_DEFER_LINK:
+ ret = SCSI_MLQUEUE_DEVICE_BUSY;
+ break;
+ case ATA_DEFER_PORT:
+ ret = SCSI_MLQUEUE_HOST_BUSY;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ ret = SCSI_MLQUEUE_HOST_BUSY;
+ break;
+ }
+
+ if (ret) {
+ /*
+ * We must defer this qc: if this is not an NCQ command, keep
+ * this qc as a deferred one and report to the SCSI layer that
+ * we issued it so that it is not requeued. The deferred qc will
+ * be issued with the port deferred_qc_work once all on-going
+ * commands complete.
+ */
+ if (!ata_is_ncq(qc->tf.protocol)) {
+ ap->deferred_qc = qc;
+ return 0;
+ }
+
+ /* Force a requeue of the command to defer its execution. */
+ ata_qc_free(qc);
+ return ret;
+ }
+
+issue:
+ ata_qc_issue(qc);
+
+ return 0;
}

/**
@@ -1713,66 +1842,49 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
* spin_lock_irqsave(host lock)
*
* RETURNS:
- * 0 on success, SCSI_ML_QUEUE_DEVICE_BUSY if the command
- * needs to be deferred.
+ * 0 on success, SCSI_ML_QUEUE_DEVICE_BUSY or SCSI_MLQUEUE_HOST_BUSY if the
+ * command needs to be deferred.
*/
static int ata_scsi_translate(struct ata_device *dev, struct scsi_cmnd *cmd,
ata_xlat_func_t xlat_func)
{
struct ata_port *ap = dev->link->ap;
struct ata_queued_cmd *qc;
- int rc;

+ lockdep_assert_held(ap->lock);
+
+ /*
+ * ata_scsi_qc_new() calls scsi_done(cmd) in case of failure. So we
+ * have nothing further to do when allocating a qc fails.
+ */
qc = ata_scsi_qc_new(dev, cmd);
if (!qc)
- goto err_mem;
+ return 0;

/* data is present; dma-map it */
if (cmd->sc_data_direction == DMA_FROM_DEVICE ||
cmd->sc_data_direction == DMA_TO_DEVICE) {
if (unlikely(scsi_bufflen(cmd) < 1)) {
ata_dev_warn(dev, "WARNING: zero len r/w req\n");
- goto err_did;
+ cmd->result = (DID_ERROR << 16);
+ goto done;
}

ata_sg_init(qc, scsi_sglist(cmd), scsi_sg_count(cmd));
-
qc->dma_dir = cmd->sc_data_direction;
}

qc->complete_fn = ata_scsi_qc_complete;

if (xlat_func(qc))
- goto early_finish;
+ goto done;

- if (ap->ops->qc_defer) {
- if ((rc = ap->ops->qc_defer(qc)))
- goto defer;
- }
-
- /* select device, send command to hardware */
- ata_qc_issue(qc);
+ return ata_scsi_qc_issue(ap, qc);

- return 0;
-
-early_finish:
- ata_qc_free(qc);
- scsi_done(cmd);
- return 0;
-
-err_did:
+done:
ata_qc_free(qc);
- cmd->result = (DID_ERROR << 16);
scsi_done(cmd);
-err_mem:
return 0;
-
-defer:
- ata_qc_free(qc);
- if (rc == ATA_DEFER_LINK)
- return SCSI_MLQUEUE_DEVICE_BUSY;
- else
- return SCSI_MLQUEUE_HOST_BUSY;
}

/**
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index e5b977a8d3e1..612fe5982818 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -161,6 +161,8 @@ void ata_scsi_sdev_config(struct scsi_device *sdev);
int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim,
struct ata_device *dev);
int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev);
+void ata_scsi_deferred_qc_work(struct work_struct *work);
+void ata_scsi_requeue_deferred_qc(struct ata_port *ap);

/* libata-eh.c */
extern unsigned int ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
diff --git a/drivers/ata/pata_ftide010.c b/drivers/ata/pata_ftide010.c
index c3a8384c3e04..c41da296eb38 100644
--- a/drivers/ata/pata_ftide010.c
+++ b/drivers/ata/pata_ftide010.c
@@ -122,10 +122,10 @@ static const u8 mwdma_50_active_time[3] = {6, 2, 2};
static const u8 mwdma_50_recovery_time[3] = {6, 2, 1};
static const u8 mwdma_66_active_time[3] = {8, 3, 3};
static const u8 mwdma_66_recovery_time[3] = {8, 2, 1};
-static const u8 udma_50_setup_time[6] = {3, 3, 2, 2, 1, 1};
+static const u8 udma_50_setup_time[6] = {3, 3, 2, 2, 1, 9};
static const u8 udma_50_hold_time[6] = {3, 1, 1, 1, 1, 1};
-static const u8 udma_66_setup_time[7] = {4, 4, 3, 2, };
-static const u8 udma_66_hold_time[7] = {};
+static const u8 udma_66_setup_time[7] = {4, 4, 3, 2, 1, 9, 9};
+static const u8 udma_66_hold_time[7] = {4, 2, 1, 1, 1, 1, 1};

/*
* We set 66 MHz for all MWDMA modes
diff --git a/drivers/auxdisplay/arm-charlcd.c b/drivers/auxdisplay/arm-charlcd.c
index a7eae99a48f7..4e22882f57c9 100644
--- a/drivers/auxdisplay/arm-charlcd.c
+++ b/drivers/auxdisplay/arm-charlcd.c
@@ -323,7 +323,7 @@ static int __init charlcd_probe(struct platform_device *pdev)
out_no_irq:
iounmap(lcd->virtbase);
out_no_memregion:
- release_mem_region(lcd->phybase, SZ_4K);
+ release_mem_region(lcd->phybase, lcd->physize);
out_no_resource:
kfree(lcd);
return ret;
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 8aa28c08b289..c0809d18fc54 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -83,13 +83,16 @@ EXPORT_SYMBOL_GPL(dev_pm_set_wake_irq);
*/
void dev_pm_clear_wake_irq(struct device *dev)
{
- struct wake_irq *wirq = dev->power.wakeirq;
+ struct wake_irq *wirq;
unsigned long flags;

- if (!wirq)
+ spin_lock_irqsave(&dev->power.lock, flags);
+ wirq = dev->power.wakeirq;
+ if (!wirq) {
+ spin_unlock_irqrestore(&dev->power.lock, flags);
return;
+ }

- spin_lock_irqsave(&dev->power.lock, flags);
device_wakeup_detach_irq(dev);
dev->power.wakeirq = NULL;
spin_unlock_irqrestore(&dev->power.lock, flags);
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index d1283ff1080b..2f630df16bfe 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -276,9 +276,7 @@ EXPORT_SYMBOL_GPL(wakeup_sources_read_unlock);
*/
struct wakeup_source *wakeup_sources_walk_start(void)
{
- struct list_head *ws_head = &wakeup_sources;
-
- return list_entry_rcu(ws_head->next, struct wakeup_source, entry);
+ return list_first_or_null_rcu(&wakeup_sources, struct wakeup_source, entry);
}
EXPORT_SYMBOL_GPL(wakeup_sources_walk_start);

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index c73376886e7a..1f6ac9202b66 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2659,9 +2659,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
* connect.
*/
.max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8,
- .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA |
- BLK_FEAT_ROTATIONAL |
- BLK_FEAT_STABLE_WRITES,
};

device = minor_to_device(minor);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 91f3b8afb63c..b502038be0a9 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1296,6 +1296,8 @@ void drbd_reconsider_queue_parameters(struct drbd_device *device,
lim.max_segments = drbd_backing_dev_max_segments(device);
} else {
lim.max_segments = BLK_MAX_SEGMENTS;
+ lim.features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA |
+ BLK_FEAT_ROTATIONAL | BLK_FEAT_STABLE_WRITES;
}

lim.max_hw_sectors = new >> SECTOR_SHIFT;
@@ -1318,8 +1320,24 @@ void drbd_reconsider_queue_parameters(struct drbd_device *device,
lim.max_hw_discard_sectors = 0;
}

- if (bdev)
+ if (bdev) {
blk_stack_limits(&lim, &b->limits, 0);
+ /*
+ * blk_set_stacking_limits() cleared the features, and
+ * blk_stack_limits() may or may not have inherited
+ * BLK_FEAT_STABLE_WRITES from the backing device.
+ *
+ * DRBD always requires stable writes because:
+ * 1. The same bio data is read for both local disk I/O and
+ * network transmission. If the page changes mid-flight,
+ * the local and remote copies could diverge.
+ * 2. When data integrity is enabled, DRBD calculates a
+ * checksum before sending the data. If the page changes
+ * between checksum calculation and transmission, the
+ * receiver will detect a checksum mismatch.
+ */
+ lim.features |= BLK_FEAT_STABLE_WRITES;
+ }

/*
* If we can handle "zeroes" efficiently on the protocol, we want to do
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 2df8941a6b14..9b3fdc202e15 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -145,18 +145,30 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
priv->sess_dev = sess_dev;
priv->id = id;

- bio = bio_alloc(file_bdev(sess_dev->bdev_file), 1,
+ bio = bio_alloc(file_bdev(sess_dev->bdev_file), !!datalen,
rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL);
- bio_add_virt_nofail(bio, data, datalen);
-
- bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw));
- if (bio_has_data(bio) &&
- bio->bi_iter.bi_size != le32_to_cpu(msg->bi_size)) {
- rnbd_srv_err_rl(sess_dev, "Datalen mismatch: bio bi_size (%u), bi_size (%u)\n",
- bio->bi_iter.bi_size, msg->bi_size);
- err = -EINVAL;
- goto bio_put;
+ if (unlikely(!bio)) {
+ err = -ENOMEM;
+ goto put_sess_dev;
}
+
+ if (!datalen) {
+ /*
+ * For special requests like DISCARD and WRITE_ZEROES, the datalen is zero.
+ */
+ bio->bi_iter.bi_size = le32_to_cpu(msg->bi_size);
+ } else {
+ bio_add_virt_nofail(bio, data, datalen);
+ bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw));
+ if (bio->bi_iter.bi_size != le32_to_cpu(msg->bi_size)) {
+ rnbd_srv_err_rl(sess_dev,
+ "Datalen mismatch: bio bi_size (%u), bi_size (%u)\n",
+ bio->bi_iter.bi_size, msg->bi_size);
+ err = -EINVAL;
+ goto bio_put;
+ }
+ }
+
bio->bi_end_io = rnbd_dev_bi_end_io;
bio->bi_private = priv;
bio->bi_iter.bi_sector = le64_to_cpu(msg->sector);
@@ -170,6 +182,7 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,

bio_put:
bio_put(bio);
+put_sess_dev:
rnbd_put_sess_dev(sess_dev);
err:
kfree(priv);
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 4b6d7b785d7b..965460d4fc76 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -2526,11 +2526,11 @@ static int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
io->res = result;
req = ublk_fill_io_cmd(io, cmd);
ret = ublk_config_io_buf(ub, io, cmd, addr, &buf_idx);
+ if (buf_idx != UBLK_INVALID_BUF_IDX)
+ io_buffer_unregister_bvec(cmd, buf_idx, issue_flags);
compl = ublk_need_complete_req(ub, io);

/* can't touch 'ublk_io' any more */
- if (buf_idx != UBLK_INVALID_BUF_IDX)
- io_buffer_unregister_bvec(cmd, buf_idx, issue_flags);
if (req_op(req) == REQ_OP_ZONE_APPEND)
req->__sector = addr;
if (compl)
@@ -3841,10 +3841,10 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
if (issue_flags & IO_URING_F_NONBLOCK)
return -EAGAIN;

- ublk_ctrl_cmd_dump(cmd);
-
if (!(issue_flags & IO_URING_F_SQE128))
- goto out;
+ return -EINVAL;
+
+ ublk_ctrl_cmd_dump(cmd);

ret = ublk_check_cmd_op(cmd_op);
if (ret)
diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c
index f280bcc61bbf..c68a8de3025b 100644
--- a/drivers/bluetooth/btintel_pcie.c
+++ b/drivers/bluetooth/btintel_pcie.c
@@ -1430,11 +1430,6 @@ static void btintel_pcie_msix_rx_handle(struct btintel_pcie_data *data)
}
}

-static irqreturn_t btintel_pcie_msix_isr(int irq, void *data)
-{
- return IRQ_WAKE_THREAD;
-}
-
static inline bool btintel_pcie_is_rxq_empty(struct btintel_pcie_data *data)
{
return data->ia.cr_hia[BTINTEL_PCIE_RXQ_NUM] == data->ia.cr_tia[BTINTEL_PCIE_RXQ_NUM];
@@ -1536,9 +1531,9 @@ static int btintel_pcie_setup_irq(struct btintel_pcie_data *data)

err = devm_request_threaded_irq(&data->pdev->dev,
msix_entry->vector,
- btintel_pcie_msix_isr,
+ NULL,
btintel_pcie_irq_msix_handler,
- IRQF_SHARED,
+ IRQF_ONESHOT | IRQF_SHARED,
KBUILD_MODNAME,
msix_entry);
if (err) {
diff --git a/drivers/char/hw_random/airoha-trng.c b/drivers/char/hw_random/airoha-trng.c
index 1dbfa9505c21..9a648f6d9fd4 100644
--- a/drivers/char/hw_random/airoha-trng.c
+++ b/drivers/char/hw_random/airoha-trng.c
@@ -212,6 +212,7 @@ static int airoha_trng_probe(struct platform_device *pdev)
trng->rng.init = airoha_trng_init;
trng->rng.cleanup = airoha_trng_cleanup;
trng->rng.read = airoha_trng_read;
+ trng->rng.quality = 900;

ret = devm_hwrng_register(dev, &trng->rng);
if (ret) {
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 018316f54621..036de7294bbd 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -20,23 +20,25 @@
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/random.h>
+#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/uaccess.h>
+#include <linux/workqueue.h>

#define RNG_MODULE_NAME "hw_random"

#define RNG_BUFFER_SIZE (SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES)

-static struct hwrng *current_rng;
+static struct hwrng __rcu *current_rng;
/* the current rng has been explicitly chosen by user via sysfs */
static int cur_rng_set_by_user;
static struct task_struct *hwrng_fill;
/* list of registered rngs */
static LIST_HEAD(rng_list);
-/* Protects rng_list and current_rng */
+/* Protects rng_list, hwrng_fill and updating on current_rng */
static DEFINE_MUTEX(rng_mutex);
/* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */
static DEFINE_MUTEX(reading_mutex);
@@ -64,18 +66,39 @@ static size_t rng_buffer_size(void)
return RNG_BUFFER_SIZE;
}

-static inline void cleanup_rng(struct kref *kref)
+static void cleanup_rng_work(struct work_struct *work)
{
- struct hwrng *rng = container_of(kref, struct hwrng, ref);
+ struct hwrng *rng = container_of(work, struct hwrng, cleanup_work);
+
+ /*
+ * Hold rng_mutex here so we serialize in case they set_current_rng
+ * on rng again immediately.
+ */
+ mutex_lock(&rng_mutex);
+
+ /* Skip if rng has been reinitialized. */
+ if (kref_read(&rng->ref)) {
+ mutex_unlock(&rng_mutex);
+ return;
+ }

if (rng->cleanup)
rng->cleanup(rng);

complete(&rng->cleanup_done);
+ mutex_unlock(&rng_mutex);
+}
+
+static inline void cleanup_rng(struct kref *kref)
+{
+ struct hwrng *rng = container_of(kref, struct hwrng, ref);
+
+ schedule_work(&rng->cleanup_work);
}

static int set_current_rng(struct hwrng *rng)
{
+ struct hwrng *old_rng;
int err;

BUG_ON(!mutex_is_locked(&rng_mutex));
@@ -84,8 +107,14 @@ static int set_current_rng(struct hwrng *rng)
if (err)
return err;

- drop_current_rng();
- current_rng = rng;
+ old_rng = rcu_dereference_protected(current_rng,
+ lockdep_is_held(&rng_mutex));
+ rcu_assign_pointer(current_rng, rng);
+
+ if (old_rng) {
+ synchronize_rcu();
+ kref_put(&old_rng->ref, cleanup_rng);
+ }

/* if necessary, start hwrng thread */
if (!hwrng_fill) {
@@ -101,47 +130,56 @@ static int set_current_rng(struct hwrng *rng)

static void drop_current_rng(void)
{
- BUG_ON(!mutex_is_locked(&rng_mutex));
- if (!current_rng)
+ struct hwrng *rng;
+
+ rng = rcu_dereference_protected(current_rng,
+ lockdep_is_held(&rng_mutex));
+ if (!rng)
return;

+ RCU_INIT_POINTER(current_rng, NULL);
+ synchronize_rcu();
+
+ if (hwrng_fill) {
+ kthread_stop(hwrng_fill);
+ hwrng_fill = NULL;
+ }
+
/* decrease last reference for triggering the cleanup */
- kref_put(&current_rng->ref, cleanup_rng);
- current_rng = NULL;
+ kref_put(&rng->ref, cleanup_rng);
}

-/* Returns ERR_PTR(), NULL or refcounted hwrng */
+/* Returns NULL or refcounted hwrng */
static struct hwrng *get_current_rng_nolock(void)
{
- if (current_rng)
- kref_get(&current_rng->ref);
+ struct hwrng *rng;
+
+ rng = rcu_dereference_protected(current_rng,
+ lockdep_is_held(&rng_mutex));
+ if (rng)
+ kref_get(&rng->ref);

- return current_rng;
+ return rng;
}

static struct hwrng *get_current_rng(void)
{
struct hwrng *rng;

- if (mutex_lock_interruptible(&rng_mutex))
- return ERR_PTR(-ERESTARTSYS);
+ rcu_read_lock();
+ rng = rcu_dereference(current_rng);
+ if (rng)
+ kref_get(&rng->ref);

- rng = get_current_rng_nolock();
+ rcu_read_unlock();

- mutex_unlock(&rng_mutex);
return rng;
}

static void put_rng(struct hwrng *rng)
{
- /*
- * Hold rng_mutex here so we serialize in case they set_current_rng
- * on rng again immediately.
- */
- mutex_lock(&rng_mutex);
if (rng)
kref_put(&rng->ref, cleanup_rng);
- mutex_unlock(&rng_mutex);
}

static int hwrng_init(struct hwrng *rng)
@@ -213,10 +251,6 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,

while (size) {
rng = get_current_rng();
- if (IS_ERR(rng)) {
- err = PTR_ERR(rng);
- goto out;
- }
if (!rng) {
err = -ENODEV;
goto out;
@@ -303,7 +337,7 @@ static struct miscdevice rng_miscdev = {

static int enable_best_rng(void)
{
- struct hwrng *rng, *new_rng = NULL;
+ struct hwrng *rng, *cur_rng, *new_rng = NULL;
int ret = -ENODEV;

BUG_ON(!mutex_is_locked(&rng_mutex));
@@ -321,7 +355,9 @@ static int enable_best_rng(void)
new_rng = rng;
}

- ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng));
+ cur_rng = rcu_dereference_protected(current_rng,
+ lockdep_is_held(&rng_mutex));
+ ret = ((new_rng == cur_rng) ? 0 : set_current_rng(new_rng));
if (!ret)
cur_rng_set_by_user = 0;

@@ -341,6 +377,9 @@ static ssize_t rng_current_store(struct device *dev,

if (sysfs_streq(buf, "")) {
err = enable_best_rng();
+ } else if (sysfs_streq(buf, "none")) {
+ cur_rng_set_by_user = 1;
+ drop_current_rng();
} else {
list_for_each_entry(rng, &rng_list, list) {
if (sysfs_streq(rng->name, buf)) {
@@ -368,8 +407,6 @@ static ssize_t rng_current_show(struct device *dev,
struct hwrng *rng;

rng = get_current_rng();
- if (IS_ERR(rng))
- return PTR_ERR(rng);

ret = sysfs_emit(buf, "%s\n", rng ? rng->name : "none");
put_rng(rng);
@@ -392,7 +429,7 @@ static ssize_t rng_available_show(struct device *dev,
strlcat(buf, rng->name, PAGE_SIZE);
strlcat(buf, " ", PAGE_SIZE);
}
- strlcat(buf, "\n", PAGE_SIZE);
+ strlcat(buf, "none\n", PAGE_SIZE);
mutex_unlock(&rng_mutex);

return strlen(buf);
@@ -413,8 +450,6 @@ static ssize_t rng_quality_show(struct device *dev,
struct hwrng *rng;

rng = get_current_rng();
- if (IS_ERR(rng))
- return PTR_ERR(rng);

if (!rng) /* no need to put_rng */
return -ENODEV;
@@ -429,6 +464,7 @@ static ssize_t rng_quality_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
+ struct hwrng *rng;
u16 quality;
int ret = -EINVAL;

@@ -445,12 +481,13 @@ static ssize_t rng_quality_store(struct device *dev,
goto out;
}

- if (!current_rng) {
+ rng = rcu_dereference_protected(current_rng, lockdep_is_held(&rng_mutex));
+ if (!rng) {
ret = -ENODEV;
goto out;
}

- current_rng->quality = quality;
+ rng->quality = quality;
current_quality = quality; /* obsolete */

/* the best available RNG may have changed */
@@ -486,8 +523,20 @@ static int hwrng_fillfn(void *unused)
struct hwrng *rng;

rng = get_current_rng();
- if (IS_ERR(rng) || !rng)
+ if (!rng) {
+ /*
+ * Keep the task_struct alive until kthread_stop()
+ * is called to avoid UAF in drop_current_rng().
+ */
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!kthread_should_stop())
+ schedule();
+ }
+ set_current_state(TASK_RUNNING);
break;
+ }
+
mutex_lock(&reading_mutex);
rc = rng_get_data(rng, rng_fillbuf,
rng_buffer_size(), 1);
@@ -515,14 +564,13 @@ static int hwrng_fillfn(void *unused)
add_hwgenerator_randomness((void *)rng_fillbuf, rc,
entropy >> 10, true);
}
- hwrng_fill = NULL;
return 0;
}

int hwrng_register(struct hwrng *rng)
{
int err = -EINVAL;
- struct hwrng *tmp;
+ struct hwrng *cur_rng, *tmp;

if (!rng->name || (!rng->data_read && !rng->read))
goto out;
@@ -537,6 +585,7 @@ int hwrng_register(struct hwrng *rng)
}
list_add_tail(&rng->list, &rng_list);

+ INIT_WORK(&rng->cleanup_work, cleanup_rng_work);
init_completion(&rng->cleanup_done);
complete(&rng->cleanup_done);
init_completion(&rng->dying);
@@ -544,16 +593,19 @@ int hwrng_register(struct hwrng *rng)
/* Adjust quality field to always have a proper value */
rng->quality = min_t(u16, min_t(u16, default_quality, 1024), rng->quality ?: 1024);

- if (!current_rng ||
- (!cur_rng_set_by_user && rng->quality > current_rng->quality)) {
- /*
- * Set new rng as current as the new rng source
- * provides better entropy quality and was not
- * chosen by userspace.
- */
- err = set_current_rng(rng);
- if (err)
- goto out_unlock;
+ if (!cur_rng_set_by_user) {
+ cur_rng = rcu_dereference_protected(current_rng,
+ lockdep_is_held(&rng_mutex));
+ if (!cur_rng || rng->quality > cur_rng->quality) {
+ /*
+ * Set new rng as current as the new rng source
+ * provides better entropy quality and was not
+ * chosen by userspace.
+ */
+ err = set_current_rng(rng);
+ if (err)
+ goto out_unlock;
+ }
}
mutex_unlock(&rng_mutex);
return 0;
@@ -566,14 +618,17 @@ EXPORT_SYMBOL_GPL(hwrng_register);

void hwrng_unregister(struct hwrng *rng)
{
- struct hwrng *new_rng;
+ struct hwrng *cur_rng;
int err;

mutex_lock(&rng_mutex);

list_del(&rng->list);
complete_all(&rng->dying);
- if (current_rng == rng) {
+
+ cur_rng = rcu_dereference_protected(current_rng,
+ lockdep_is_held(&rng_mutex));
+ if (cur_rng == rng) {
err = enable_best_rng();
if (err) {
drop_current_rng();
@@ -581,17 +636,7 @@ void hwrng_unregister(struct hwrng *rng)
}
}

- new_rng = get_current_rng_nolock();
- if (list_empty(&rng_list)) {
- mutex_unlock(&rng_mutex);
- if (hwrng_fill)
- kthread_stop(hwrng_fill);
- } else
- mutex_unlock(&rng_mutex);
-
- if (new_rng)
- put_rng(new_rng);
-
+ mutex_unlock(&rng_mutex);
wait_for_completion(&rng->cleanup_done);
}
EXPORT_SYMBOL_GPL(hwrng_unregister);
@@ -679,7 +724,7 @@ static int __init hwrng_modinit(void)
static void __exit hwrng_modexit(void)
{
mutex_lock(&rng_mutex);
- BUG_ON(current_rng);
+ WARN_ON(rcu_access_pointer(current_rng));
kfree(rng_buffer);
kfree(rng_fillbuf);
mutex_unlock(&rng_mutex);
diff --git a/drivers/char/misc_minor_kunit.c b/drivers/char/misc_minor_kunit.c
index 6fc8b05169c5..e930c78e1ef9 100644
--- a/drivers/char/misc_minor_kunit.c
+++ b/drivers/char/misc_minor_kunit.c
@@ -166,7 +166,7 @@ static void __init miscdev_test_can_open(struct kunit *test, struct miscdevice *
KUNIT_FAIL(test, "failed to create node\n");

filp = filp_open(devname, O_RDONLY, 0);
- if (IS_ERR_OR_NULL(filp))
+ if (IS_ERR(filp))
KUNIT_FAIL(test, "failed to open misc device: %ld\n", PTR_ERR(filp));
else
fput(filp);
diff --git a/drivers/char/tpm/st33zp24/st33zp24.c b/drivers/char/tpm/st33zp24/st33zp24.c
index 2ed7815e4899..e2b7451ea7cc 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.c
+++ b/drivers/char/tpm/st33zp24/st33zp24.c
@@ -328,8 +328,10 @@ static int st33zp24_send(struct tpm_chip *chip, unsigned char *buf,

for (i = 0; i < len - 1;) {
burstcnt = get_burstcount(chip);
- if (burstcnt < 0)
- return burstcnt;
+ if (burstcnt < 0) {
+ ret = burstcnt;
+ goto out_err;
+ }
size = min_t(int, len - i - 1, burstcnt);
ret = tpm_dev->ops->send(tpm_dev->phy_id, TPM_DATA_FIFO,
buf + i, size);
diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c
index bdf1f329a679..8b7d32de0b2e 100644
--- a/drivers/char/tpm/tpm_i2c_infineon.c
+++ b/drivers/char/tpm/tpm_i2c_infineon.c
@@ -544,8 +544,10 @@ static int tpm_tis_i2c_send(struct tpm_chip *chip, u8 *buf, size_t bufsiz,
burstcnt = get_burstcount(chip);

/* burstcnt < 0 = TPM is busy */
- if (burstcnt < 0)
- return burstcnt;
+ if (burstcnt < 0) {
+ rc = burstcnt;
+ goto out_err;
+ }

if (burstcnt > (len - 1 - count))
burstcnt = len - 1 - count;
diff --git a/drivers/clk/actions/owl-composite.c b/drivers/clk/actions/owl-composite.c
index 00b74f8bc437..9540444307d6 100644
--- a/drivers/clk/actions/owl-composite.c
+++ b/drivers/clk/actions/owl-composite.c
@@ -57,15 +57,10 @@ static int owl_comp_div_determine_rate(struct clk_hw *hw,
struct clk_rate_request *req)
{
struct owl_composite *comp = hw_to_owl_comp(hw);
- long rate;
-
- rate = owl_divider_helper_round_rate(&comp->common, &comp->rate.div_hw,
- req->rate, &req->best_parent_rate);
- if (rate < 0)
- return rate;
+ struct owl_divider_hw *div = &comp->rate.div_hw;

- req->rate = rate;
- return 0;
+ return divider_determine_rate(&comp->common.hw, req, div->table,
+ div->width, div->div_flags);
}

static unsigned long owl_comp_div_recalc_rate(struct clk_hw *hw,
diff --git a/drivers/clk/actions/owl-divider.c b/drivers/clk/actions/owl-divider.c
index 118f1393c678..316ace80e87e 100644
--- a/drivers/clk/actions/owl-divider.c
+++ b/drivers/clk/actions/owl-divider.c
@@ -13,26 +13,13 @@

#include "owl-divider.h"

-long owl_divider_helper_round_rate(struct owl_clk_common *common,
- const struct owl_divider_hw *div_hw,
- unsigned long rate,
- unsigned long *parent_rate)
-{
- return divider_round_rate(&common->hw, rate, parent_rate,
- div_hw->table, div_hw->width,
- div_hw->div_flags);
-}
-
static int owl_divider_determine_rate(struct clk_hw *hw,
struct clk_rate_request *req)
{
struct owl_divider *div = hw_to_owl_divider(hw);

- req->rate = owl_divider_helper_round_rate(&div->common, &div->div_hw,
- req->rate,
- &req->best_parent_rate);
-
- return 0;
+ return divider_determine_rate(hw, req, div->div_hw.table,
+ div->div_hw.width, div->div_hw.div_flags);
}

unsigned long owl_divider_helper_recalc_rate(struct owl_clk_common *common,
diff --git a/drivers/clk/actions/owl-divider.h b/drivers/clk/actions/owl-divider.h
index 083be6d80954..2ba957740c38 100644
--- a/drivers/clk/actions/owl-divider.h
+++ b/drivers/clk/actions/owl-divider.h
@@ -56,11 +56,6 @@ static inline struct owl_divider *hw_to_owl_divider(const struct clk_hw *hw)
return container_of(common, struct owl_divider, common);
}

-long owl_divider_helper_round_rate(struct owl_clk_common *common,
- const struct owl_divider_hw *div_hw,
- unsigned long rate,
- unsigned long *parent_rate);
-
unsigned long owl_divider_helper_recalc_rate(struct owl_clk_common *common,
const struct owl_divider_hw *div_hw,
unsigned long parent_rate);
diff --git a/drivers/clk/clk-bm1880.c b/drivers/clk/clk-bm1880.c
index dac190bc6e19..d2617fe16d2e 100644
--- a/drivers/clk/clk-bm1880.c
+++ b/drivers/clk/clk-bm1880.c
@@ -629,10 +629,7 @@ static int bm1880_clk_div_determine_rate(struct clk_hw *hw,
return 0;
}

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
- div->table, div->width, div->flags);
-
- return 0;
+ return divider_determine_rate(hw, req, div->table, div->width, div->flags);
}

static int bm1880_clk_div_set_rate(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/clk/clk-loongson1.c b/drivers/clk/clk-loongson1.c
index f9f060d08a5f..1674181a1107 100644
--- a/drivers/clk/clk-loongson1.c
+++ b/drivers/clk/clk-loongson1.c
@@ -99,10 +99,7 @@ static int ls1x_divider_determine_rate(struct clk_hw *hw,
struct ls1x_clk *ls1x_clk = to_ls1x_clk(hw);
const struct ls1x_clk_div_data *d = ls1x_clk->data;

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
- d->table, d->width, d->flags);
-
- return 0;
+ return divider_determine_rate(hw, req, d->table, d->width, d->flags);
}

static int ls1x_divider_set_rate(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/clk/clk-milbeaut.c b/drivers/clk/clk-milbeaut.c
index b4f9b7143eaa..bb94d02a76cf 100644
--- a/drivers/clk/clk-milbeaut.c
+++ b/drivers/clk/clk-milbeaut.c
@@ -407,10 +407,7 @@ static int m10v_clk_divider_determine_rate(struct clk_hw *hw,
return 0;
}

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
- divider->table, divider->width, divider->flags);
-
- return 0;
+ return divider_determine_rate(hw, req, divider->table, divider->width, divider->flags);
}

static int m10v_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/clk/clk-versaclock3.c b/drivers/clk/clk-versaclock3.c
index 1849863dbd67..27b6cf70f3ae 100644
--- a/drivers/clk/clk-versaclock3.c
+++ b/drivers/clk/clk-versaclock3.c
@@ -523,11 +523,8 @@ static int vc3_div_determine_rate(struct clk_hw *hw,
return 0;
}

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
- div_data->table,
- div_data->width, div_data->flags);
-
- return 0;
+ return divider_determine_rate(hw, req, div_data->table, div_data->width,
+ div_data->flags);
}

static int vc3_div_set_rate(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/clk/hisilicon/clkdivider-hi6220.c b/drivers/clk/hisilicon/clkdivider-hi6220.c
index 6bae18a84cb6..fd7ceb92d651 100644
--- a/drivers/clk/hisilicon/clkdivider-hi6220.c
+++ b/drivers/clk/hisilicon/clkdivider-hi6220.c
@@ -60,10 +60,8 @@ static int hi6220_clkdiv_determine_rate(struct clk_hw *hw,
{
struct hi6220_clk_divider *dclk = to_hi6220_clk_divider(hw);

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate, dclk->table,
- dclk->width, CLK_DIVIDER_ROUND_CLOSEST);
-
- return 0;
+ return divider_determine_rate(hw, req, dclk->table, dclk->width,
+ CLK_DIVIDER_ROUND_CLOSEST);
}

static int hi6220_clkdiv_set_rate(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/clk/mediatek/clk-mt7981-eth.c b/drivers/clk/mediatek/clk-mt7981-eth.c
index 906aec9ddff5..0655ebb6c561 100644
--- a/drivers/clk/mediatek/clk-mt7981-eth.c
+++ b/drivers/clk/mediatek/clk-mt7981-eth.c
@@ -31,7 +31,7 @@ static const struct mtk_gate_regs sgmii0_cg_regs = {
.ops = &mtk_clk_gate_ops_no_setclr_inv, \
}

-static const struct mtk_gate sgmii0_clks[] __initconst = {
+static const struct mtk_gate sgmii0_clks[] = {
GATE_SGMII0(CLK_SGM0_TX_EN, "sgm0_tx_en", "usb_tx250m", 2),
GATE_SGMII0(CLK_SGM0_RX_EN, "sgm0_rx_en", "usb_eq_rx250m", 3),
GATE_SGMII0(CLK_SGM0_CK0_EN, "sgm0_ck0_en", "usb_ln0", 4),
@@ -53,7 +53,7 @@ static const struct mtk_gate_regs sgmii1_cg_regs = {
.ops = &mtk_clk_gate_ops_no_setclr_inv, \
}

-static const struct mtk_gate sgmii1_clks[] __initconst = {
+static const struct mtk_gate sgmii1_clks[] = {
GATE_SGMII1(CLK_SGM1_TX_EN, "sgm1_tx_en", "usb_tx250m", 2),
GATE_SGMII1(CLK_SGM1_RX_EN, "sgm1_rx_en", "usb_eq_rx250m", 3),
GATE_SGMII1(CLK_SGM1_CK1_EN, "sgm1_ck1_en", "usb_ln0", 4),
@@ -75,7 +75,7 @@ static const struct mtk_gate_regs eth_cg_regs = {
.ops = &mtk_clk_gate_ops_no_setclr_inv, \
}

-static const struct mtk_gate eth_clks[] __initconst = {
+static const struct mtk_gate eth_clks[] = {
GATE_ETH(CLK_ETH_FE_EN, "eth_fe_en", "netsys_2x", 6),
GATE_ETH(CLK_ETH_GP2_EN, "eth_gp2_en", "sgm_325m", 7),
GATE_ETH(CLK_ETH_GP1_EN, "eth_gp1_en", "sgm_325m", 8),
diff --git a/drivers/clk/mediatek/clk-mt8196-mfg.c b/drivers/clk/mediatek/clk-mt8196-mfg.c
index ae1eb9de79ae..f40795b47ff1 100644
--- a/drivers/clk/mediatek/clk-mt8196-mfg.c
+++ b/drivers/clk/mediatek/clk-mt8196-mfg.c
@@ -58,24 +58,25 @@
.pcw_shift = _pcw_shift, \
.pcwbits = _pcwbits, \
.pcwibits = MT8196_INTEGER_BITS, \
+ .parent_name = "mfg_eb", \
}

static const struct mtk_pll_data mfg_ao_plls[] = {
- PLL(CLK_MFG_AO_MFGPLL, "mfgpll", MFGPLL_CON0, MFGPLL_CON0, 0, 0, 0,
- BIT(0), MFGPLL_CON1, 24, 0, 0, 0,
+ PLL(CLK_MFG_AO_MFGPLL, "mfgpll", MFGPLL_CON0, MFGPLL_CON0, 0, 0,
+ PLL_PARENT_EN, BIT(0), MFGPLL_CON1, 24, 0, 0, 0,
MFGPLL_CON1, 0, 22),
};

static const struct mtk_pll_data mfgsc0_ao_plls[] = {
PLL(CLK_MFGSC0_AO_MFGPLL_SC0, "mfgpll-sc0", MFGPLL_SC0_CON0,
- MFGPLL_SC0_CON0, 0, 0, 0, BIT(0), MFGPLL_SC0_CON1, 24, 0, 0, 0,
- MFGPLL_SC0_CON1, 0, 22),
+ MFGPLL_SC0_CON0, 0, 0, PLL_PARENT_EN, BIT(0), MFGPLL_SC0_CON1, 24,
+ 0, 0, 0, MFGPLL_SC0_CON1, 0, 22),
};

static const struct mtk_pll_data mfgsc1_ao_plls[] = {
PLL(CLK_MFGSC1_AO_MFGPLL_SC1, "mfgpll-sc1", MFGPLL_SC1_CON0,
- MFGPLL_SC1_CON0, 0, 0, 0, BIT(0), MFGPLL_SC1_CON1, 24, 0, 0, 0,
- MFGPLL_SC1_CON1, 0, 22),
+ MFGPLL_SC1_CON0, 0, 0, PLL_PARENT_EN, BIT(0), MFGPLL_SC1_CON1, 24,
+ 0, 0, 0, MFGPLL_SC1_CON1, 0, 22),
};

static const struct of_device_id of_match_clk_mt8196_mfg[] = {
diff --git a/drivers/clk/mediatek/clk-mt8516.c b/drivers/clk/mediatek/clk-mt8516.c
index 21eb052b0a53..342a59019fea 100644
--- a/drivers/clk/mediatek/clk-mt8516.c
+++ b/drivers/clk/mediatek/clk-mt8516.c
@@ -544,7 +544,7 @@ static const struct mtk_gate_regs top5_cg_regs = {
#define GATE_TOP5(_id, _name, _parent, _shift) \
GATE_MTK(_id, _name, _parent, &top5_cg_regs, _shift, &mtk_clk_gate_ops_setclr)

-static const struct mtk_gate top_clks[] __initconst = {
+static const struct mtk_gate top_clks[] = {
/* TOP1 */
GATE_TOP1(CLK_TOP_THEM, "them", "ahb_infra_sel", 1),
GATE_TOP1(CLK_TOP_APDMA, "apdma", "ahb_infra_sel", 2),
diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c
index 19cd27941747..deafe55a96cb 100644
--- a/drivers/clk/mediatek/clk-mtk.c
+++ b/drivers/clk/mediatek/clk-mtk.c
@@ -497,14 +497,16 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev,


if (mcd->need_runtime_pm) {
- devm_pm_runtime_enable(&pdev->dev);
+ r = devm_pm_runtime_enable(&pdev->dev);
+ if (r)
+ goto unmap_io;
/*
* Do a pm_runtime_resume_and_get() to workaround a possible
* deadlock between clk_register() and the genpd framework.
*/
r = pm_runtime_resume_and_get(&pdev->dev);
if (r)
- return r;
+ goto unmap_io;
}

/* Calculate how many clk_hw_onecell_data entries to allocate */
@@ -618,11 +620,11 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev,
free_data:
mtk_free_clk_data(clk_data);
free_base:
- if (mcd->shared_io && base)
- iounmap(base);
-
if (mcd->need_runtime_pm)
pm_runtime_put(&pdev->dev);
+unmap_io:
+ if (mcd->shared_io && base)
+ iounmap(base);
return r;
}

diff --git a/drivers/clk/mediatek/clk-pll.c b/drivers/clk/mediatek/clk-pll.c
index cd2b6ce551c6..de3eb0267055 100644
--- a/drivers/clk/mediatek/clk-pll.c
+++ b/drivers/clk/mediatek/clk-pll.c
@@ -358,6 +358,9 @@ struct clk_hw *mtk_clk_register_pll_ops(struct mtk_clk_pll *pll,

init.name = data->name;
init.flags = (data->flags & PLL_AO) ? CLK_IS_CRITICAL : 0;
+ if (data->flags & PLL_PARENT_EN)
+ init.flags |= CLK_OPS_PARENT_ENABLE;
+
init.ops = pll_ops;
if (data->parent_name)
init.parent_names = &data->parent_name;
diff --git a/drivers/clk/mediatek/clk-pll.h b/drivers/clk/mediatek/clk-pll.h
index d71c150ce83e..de5a8fb7cbcf 100644
--- a/drivers/clk/mediatek/clk-pll.h
+++ b/drivers/clk/mediatek/clk-pll.h
@@ -21,6 +21,7 @@ struct mtk_pll_div_table {

#define HAVE_RST_BAR BIT(0)
#define PLL_AO BIT(1)
+#define PLL_PARENT_EN BIT(2)
#define POSTDIV_MASK GENMASK(2, 0)

struct mtk_pll_data {
diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c
index 185b6348251d..d0d4c7b6dc82 100644
--- a/drivers/clk/meson/g12a.c
+++ b/drivers/clk/meson/g12a.c
@@ -777,12 +777,23 @@ static struct clk_regmap g12a_hdmi_pll_dco = {
},
};

+/*
+ * G12/SM1 hdmi OD dividers are POWER_OF_TWO dividers but limited to /4.
+ * A divider value of 3 should map to /8 but instead map /4 so ignore it.
+ */
+static const struct clk_div_table g12a_hdmi_pll_od_div_table[] = {
+ { .val = 0, .div = 1 },
+ { .val = 1, .div = 2 },
+ { .val = 2, .div = 4 },
+ { /* sentinel */ }
+};
+
static struct clk_regmap g12a_hdmi_pll_od = {
.data = &(struct clk_regmap_div_data){
.offset = HHI_HDMI_PLL_CNTL0,
.shift = 16,
.width = 2,
- .flags = CLK_DIVIDER_POWER_OF_TWO,
+ .table = g12a_hdmi_pll_od_div_table,
},
.hw.init = &(struct clk_init_data){
.name = "hdmi_pll_od",
@@ -800,7 +811,7 @@ static struct clk_regmap g12a_hdmi_pll_od2 = {
.offset = HHI_HDMI_PLL_CNTL0,
.shift = 18,
.width = 2,
- .flags = CLK_DIVIDER_POWER_OF_TWO,
+ .table = g12a_hdmi_pll_od_div_table,
},
.hw.init = &(struct clk_init_data){
.name = "hdmi_pll_od2",
@@ -818,7 +829,7 @@ static struct clk_regmap g12a_hdmi_pll = {
.offset = HHI_HDMI_PLL_CNTL0,
.shift = 20,
.width = 2,
- .flags = CLK_DIVIDER_POWER_OF_TWO,
+ .table = g12a_hdmi_pll_od_div_table,
},
.hw.init = &(struct clk_init_data){
.name = "hdmi_pll",
diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 5a229c4ffae1..ec9a3414875a 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -349,12 +349,23 @@ static struct clk_regmap gxbb_hdmi_pll = {
},
};

+/*
+ * GXL hdmi OD dividers are POWER_OF_TWO dividers but limited to /4.
+ * A divider value of 3 should map to /8 but instead map /4 so ignore it.
+ */
+static const struct clk_div_table gxl_hdmi_pll_od_div_table[] = {
+ { .val = 0, .div = 1 },
+ { .val = 1, .div = 2 },
+ { .val = 2, .div = 4 },
+ { /* sentinel */ }
+};
+
static struct clk_regmap gxl_hdmi_pll_od = {
.data = &(struct clk_regmap_div_data){
.offset = HHI_HDMI_PLL_CNTL + 8,
.shift = 21,
.width = 2,
- .flags = CLK_DIVIDER_POWER_OF_TWO,
+ .table = gxl_hdmi_pll_od_div_table,
},
.hw.init = &(struct clk_init_data){
.name = "hdmi_pll_od",
@@ -372,7 +383,7 @@ static struct clk_regmap gxl_hdmi_pll_od2 = {
.offset = HHI_HDMI_PLL_CNTL + 8,
.shift = 23,
.width = 2,
- .flags = CLK_DIVIDER_POWER_OF_TWO,
+ .table = gxl_hdmi_pll_od_div_table,
},
.hw.init = &(struct clk_init_data){
.name = "hdmi_pll_od2",
@@ -390,7 +401,7 @@ static struct clk_regmap gxl_hdmi_pll = {
.offset = HHI_HDMI_PLL_CNTL + 8,
.shift = 19,
.width = 2,
- .flags = CLK_DIVIDER_POWER_OF_TWO,
+ .table = gxl_hdmi_pll_od_div_table,
},
.hw.init = &(struct clk_init_data){
.name = "hdmi_pll",
diff --git a/drivers/clk/microchip/clk-core.c b/drivers/clk/microchip/clk-core.c
index b34348d491f3..a0163441dfe5 100644
--- a/drivers/clk/microchip/clk-core.c
+++ b/drivers/clk/microchip/clk-core.c
@@ -780,15 +780,6 @@ static unsigned long sclk_get_rate(struct clk_hw *hw, unsigned long parent_rate)
return parent_rate / div;
}

-static int sclk_determine_rate(struct clk_hw *hw,
- struct clk_rate_request *req)
-{
- req->rate = calc_best_divided_rate(req->rate, req->best_parent_rate,
- SLEW_SYSDIV, 1);
-
- return 0;
-}
-
static int sclk_set_rate(struct clk_hw *hw,
unsigned long rate, unsigned long parent_rate)
{
@@ -912,7 +903,6 @@ static int sclk_init(struct clk_hw *hw)
const struct clk_ops pic32_sclk_ops = {
.get_parent = sclk_get_parent,
.set_parent = sclk_set_parent,
- .determine_rate = sclk_determine_rate,
.set_rate = sclk_set_rate,
.recalc_rate = sclk_get_rate,
.init = sclk_init,
diff --git a/drivers/clk/nuvoton/clk-ma35d1-divider.c b/drivers/clk/nuvoton/clk-ma35d1-divider.c
index e39f53d5bf45..e992e7c30341 100644
--- a/drivers/clk/nuvoton/clk-ma35d1-divider.c
+++ b/drivers/clk/nuvoton/clk-ma35d1-divider.c
@@ -44,11 +44,8 @@ static int ma35d1_clkdiv_determine_rate(struct clk_hw *hw,
{
struct ma35d1_adc_clk_div *dclk = to_ma35d1_adc_clk_div(hw);

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
- dclk->table, dclk->width,
- CLK_DIVIDER_ROUND_CLOSEST);
-
- return 0;
+ return divider_determine_rate(hw, req, dclk->table, dclk->width,
+ CLK_DIVIDER_ROUND_CLOSEST);
}

static int ma35d1_clkdiv_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate)
diff --git a/drivers/clk/nxp/clk-lpc32xx.c b/drivers/clk/nxp/clk-lpc32xx.c
index 23f980cf6a2b..ae2fa5341a2e 100644
--- a/drivers/clk/nxp/clk-lpc32xx.c
+++ b/drivers/clk/nxp/clk-lpc32xx.c
@@ -975,10 +975,8 @@ static int clk_divider_determine_rate(struct clk_hw *hw,
return 0;
}

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
- divider->table, divider->width, divider->flags);
-
- return 0;
+ return divider_determine_rate(hw, req, divider->table, divider->width,
+ divider->flags);
}

static int clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c
index 6aeba40358c1..a84e8bee6534 100644
--- a/drivers/clk/qcom/clk-alpha-pll.c
+++ b/drivers/clk/qcom/clk-alpha-pll.c
@@ -1257,11 +1257,8 @@ static int clk_alpha_pll_postdiv_determine_rate(struct clk_hw *hw,
else
table = clk_alpha_div_table;

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
- table, pll->width,
- CLK_DIVIDER_POWER_OF_TWO);
-
- return 0;
+ return divider_determine_rate(hw, req, table, pll->width,
+ CLK_DIVIDER_POWER_OF_TWO);
}

static int clk_alpha_pll_postdiv_ro_determine_rate(struct clk_hw *hw,
@@ -1617,11 +1614,8 @@ static int clk_trion_pll_postdiv_determine_rate(struct clk_hw *hw,
{
struct clk_alpha_pll_postdiv *pll = to_clk_alpha_pll_postdiv(hw);

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
- pll->post_div_table,
- pll->width, CLK_DIVIDER_ROUND_CLOSEST);
-
- return 0;
+ return divider_determine_rate(hw, req, pll->post_div_table, pll->width,
+ CLK_DIVIDER_ROUND_CLOSEST);
};

static int
@@ -1657,11 +1651,8 @@ static int clk_alpha_pll_postdiv_fabia_determine_rate(struct clk_hw *hw,
{
struct clk_alpha_pll_postdiv *pll = to_clk_alpha_pll_postdiv(hw);

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
- pll->post_div_table,
- pll->width, CLK_DIVIDER_ROUND_CLOSEST);
-
- return 0;
+ return divider_determine_rate(hw, req, pll->post_div_table, pll->width,
+ CLK_DIVIDER_ROUND_CLOSEST);
}

static int clk_alpha_pll_postdiv_fabia_set_rate(struct clk_hw *hw,
diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c
index e18cb8807d73..d0a5847f9111 100644
--- a/drivers/clk/qcom/clk-rcg2.c
+++ b/drivers/clk/qcom/clk-rcg2.c
@@ -755,7 +755,7 @@ static int clk_rcg2_get_duty_cycle(struct clk_hw *hw, struct clk_duty *duty)
static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty)
{
struct clk_rcg2 *rcg = to_clk_rcg2(hw);
- u32 notn_m, n, m, d, not2d, mask, duty_per, cfg;
+ u32 notn_m, n, m, d, not2d, mask, cfg;
int ret;

/* Duty-cycle cannot be modified for non-MND RCGs */
@@ -774,10 +774,8 @@ static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty)

n = (~(notn_m) + m) & mask;

- duty_per = (duty->num * 100) / duty->den;
-
/* Calculate 2d value */
- d = DIV_ROUND_CLOSEST(n * duty_per * 2, 100);
+ d = DIV_ROUND_CLOSEST(n * duty->num * 2, duty->den);

/*
* Check bit widths of 2d. If D is too big reduce duty cycle.
@@ -1266,6 +1264,7 @@ static int clk_gfx3d_determine_rate(struct clk_hw *hw,
if (req->max_rate < parent_req.max_rate)
parent_req.max_rate = req->max_rate;

+ parent_req.best_parent_hw = req->best_parent_hw;
ret = __clk_determine_rate(req->best_parent_hw, &parent_req);
if (ret)
return ret;
diff --git a/drivers/clk/qcom/clk-regmap-divider.c b/drivers/clk/qcom/clk-regmap-divider.c
index 4f5395f0ab6d..672e82caf205 100644
--- a/drivers/clk/qcom/clk-regmap-divider.c
+++ b/drivers/clk/qcom/clk-regmap-divider.c
@@ -26,24 +26,16 @@ static int div_ro_determine_rate(struct clk_hw *hw,
val >>= divider->shift;
val &= BIT(divider->width) - 1;

- req->rate = divider_ro_round_rate(hw, req->rate,
- &req->best_parent_rate, NULL,
- divider->width,
- CLK_DIVIDER_ROUND_CLOSEST, val);
-
- return 0;
+ return divider_ro_determine_rate(hw, req, NULL, divider->width,
+ CLK_DIVIDER_ROUND_CLOSEST, val);
}

static int div_determine_rate(struct clk_hw *hw, struct clk_rate_request *req)
{
struct clk_regmap_div *divider = to_clk_regmap_div(hw);

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
- NULL,
- divider->width,
- CLK_DIVIDER_ROUND_CLOSEST);
-
- return 0;
+ return divider_determine_rate(hw, req, NULL, divider->width,
+ CLK_DIVIDER_ROUND_CLOSEST);
}

static int div_set_rate(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c
index 121591886774..eec369d2173b 100644
--- a/drivers/clk/qcom/common.c
+++ b/drivers/clk/qcom/common.c
@@ -454,7 +454,7 @@ int qcom_cc_probe_by_index(struct platform_device *pdev, int index,

base = devm_platform_ioremap_resource(pdev, index);
if (IS_ERR(base))
- return -ENOMEM;
+ return PTR_ERR(base);

regmap = devm_regmap_init_mmio(&pdev->dev, base, desc->config);
if (IS_ERR(regmap))
diff --git a/drivers/clk/qcom/dispcc-sdm845.c b/drivers/clk/qcom/dispcc-sdm845.c
index 2f9e9665d7e9..78e43f6d7502 100644
--- a/drivers/clk/qcom/dispcc-sdm845.c
+++ b/drivers/clk/qcom/dispcc-sdm845.c
@@ -280,7 +280,7 @@ static struct clk_rcg2 disp_cc_mdss_pclk0_clk_src = {
.name = "disp_cc_mdss_pclk0_clk_src",
.parent_data = disp_cc_parent_data_4,
.num_parents = ARRAY_SIZE(disp_cc_parent_data_4),
- .flags = CLK_SET_RATE_PARENT,
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
.ops = &clk_pixel_ops,
},
};
@@ -295,7 +295,7 @@ static struct clk_rcg2 disp_cc_mdss_pclk1_clk_src = {
.name = "disp_cc_mdss_pclk1_clk_src",
.parent_data = disp_cc_parent_data_4,
.num_parents = ARRAY_SIZE(disp_cc_parent_data_4),
- .flags = CLK_SET_RATE_PARENT,
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
.ops = &clk_pixel_ops,
},
};
diff --git a/drivers/clk/qcom/dispcc-sm7150.c b/drivers/clk/qcom/dispcc-sm7150.c
index ddc7230b8aea..923f0f38e804 100644
--- a/drivers/clk/qcom/dispcc-sm7150.c
+++ b/drivers/clk/qcom/dispcc-sm7150.c
@@ -370,7 +370,7 @@ static struct clk_rcg2 dispcc_mdss_pclk1_clk_src = {
.name = "dispcc_mdss_pclk1_clk_src",
.parent_data = dispcc_parent_data_4,
.num_parents = ARRAY_SIZE(dispcc_parent_data_4),
- .flags = CLK_SET_RATE_PARENT,
+ .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
.ops = &clk_pixel_ops,
},
};
diff --git a/drivers/clk/qcom/gcc-glymur.c b/drivers/clk/qcom/gcc-glymur.c
index d938e7dc5b66..17e860307fa1 100644
--- a/drivers/clk/qcom/gcc-glymur.c
+++ b/drivers/clk/qcom/gcc-glymur.c
@@ -2317,7 +2317,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
.parent_data = gcc_parent_data_17,
.num_parents = ARRAY_SIZE(gcc_parent_data_17),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

@@ -2339,7 +2339,7 @@ static struct clk_rcg2 gcc_sdcc4_apps_clk_src = {
.parent_data = gcc_parent_data_3,
.num_parents = ARRAY_SIZE(gcc_parent_data_3),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

diff --git a/drivers/clk/qcom/gcc-ipq5018.c b/drivers/clk/qcom/gcc-ipq5018.c
index dcda2be8c1a5..64792cda0620 100644
--- a/drivers/clk/qcom/gcc-ipq5018.c
+++ b/drivers/clk/qcom/gcc-ipq5018.c
@@ -1340,6 +1340,7 @@ static struct clk_branch gcc_sleep_clk_src = {
.name = "gcc_sleep_clk_src",
.parent_data = gcc_sleep_clk_data,
.num_parents = ARRAY_SIZE(gcc_sleep_clk_data),
+ .flags = CLK_IS_CRITICAL,
.ops = &clk_branch2_ops,
},
},
diff --git a/drivers/clk/qcom/gcc-milos.c b/drivers/clk/qcom/gcc-milos.c
index c9d61b05bafa..81fa09ec55d7 100644
--- a/drivers/clk/qcom/gcc-milos.c
+++ b/drivers/clk/qcom/gcc-milos.c
@@ -917,7 +917,7 @@ static struct clk_rcg2 gcc_sdcc1_apps_clk_src = {
.name = "gcc_sdcc1_apps_clk_src",
.parent_data = gcc_parent_data_9,
.num_parents = ARRAY_SIZE(gcc_parent_data_9),
- .ops = &clk_rcg2_shared_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

@@ -938,7 +938,7 @@ static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = {
.name = "gcc_sdcc1_ice_core_clk_src",
.parent_data = gcc_parent_data_10,
.num_parents = ARRAY_SIZE(gcc_parent_data_10),
- .ops = &clk_rcg2_shared_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

@@ -962,7 +962,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
.name = "gcc_sdcc2_apps_clk_src",
.parent_data = gcc_parent_data_11,
.num_parents = ARRAY_SIZE(gcc_parent_data_11),
- .ops = &clk_rcg2_shared_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

diff --git a/drivers/clk/qcom/gcc-msm8917.c b/drivers/clk/qcom/gcc-msm8917.c
index 0a1aa623cd49..9d1c5a9953e2 100644
--- a/drivers/clk/qcom/gcc-msm8917.c
+++ b/drivers/clk/qcom/gcc-msm8917.c
@@ -3409,7 +3409,6 @@ static struct gdsc cpp_gdsc = {
.pd = {
.name = "cpp_gdsc",
},
- .flags = ALWAYS_ON,
.pwrsts = PWRSTS_OFF_ON,
};

diff --git a/drivers/clk/qcom/gcc-msm8953.c b/drivers/clk/qcom/gcc-msm8953.c
index 8f29ecc74c50..8fe1d3e42144 100644
--- a/drivers/clk/qcom/gcc-msm8953.c
+++ b/drivers/clk/qcom/gcc-msm8953.c
@@ -3946,7 +3946,6 @@ static struct gdsc cpp_gdsc = {
.pd = {
.name = "cpp_gdsc",
},
- .flags = ALWAYS_ON,
.pwrsts = PWRSTS_OFF_ON,
};

diff --git a/drivers/clk/qcom/gcc-qdu1000.c b/drivers/clk/qcom/gcc-qdu1000.c
index dbe9e9437939..915bb9b4ff81 100644
--- a/drivers/clk/qcom/gcc-qdu1000.c
+++ b/drivers/clk/qcom/gcc-qdu1000.c
@@ -904,7 +904,7 @@ static struct clk_rcg2 gcc_sdcc5_apps_clk_src = {
.name = "gcc_sdcc5_apps_clk_src",
.parent_data = gcc_parent_data_8,
.num_parents = ARRAY_SIZE(gcc_parent_data_8),
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

@@ -923,7 +923,7 @@ static struct clk_rcg2 gcc_sdcc5_ice_core_clk_src = {
.name = "gcc_sdcc5_ice_core_clk_src",
.parent_data = gcc_parent_data_2,
.num_parents = ARRAY_SIZE(gcc_parent_data_2),
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

diff --git a/drivers/clk/qcom/gcc-sdx75.c b/drivers/clk/qcom/gcc-sdx75.c
index 453a6bf8e878..1f3cd58483a2 100644
--- a/drivers/clk/qcom/gcc-sdx75.c
+++ b/drivers/clk/qcom/gcc-sdx75.c
@@ -1033,7 +1033,7 @@ static struct clk_rcg2 gcc_sdcc1_apps_clk_src = {
.name = "gcc_sdcc1_apps_clk_src",
.parent_data = gcc_parent_data_17,
.num_parents = ARRAY_SIZE(gcc_parent_data_17),
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

@@ -1057,7 +1057,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
.name = "gcc_sdcc2_apps_clk_src",
.parent_data = gcc_parent_data_18,
.num_parents = ARRAY_SIZE(gcc_parent_data_18),
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

diff --git a/drivers/clk/qcom/gcc-sm4450.c b/drivers/clk/qcom/gcc-sm4450.c
index e2d9e4691c5b..023d840e9f4e 100644
--- a/drivers/clk/qcom/gcc-sm4450.c
+++ b/drivers/clk/qcom/gcc-sm4450.c
@@ -769,7 +769,7 @@ static struct clk_rcg2 gcc_sdcc1_apps_clk_src = {
.parent_data = gcc_parent_data_4,
.num_parents = ARRAY_SIZE(gcc_parent_data_4),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

@@ -791,7 +791,7 @@ static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = {
.parent_data = gcc_parent_data_4,
.num_parents = ARRAY_SIZE(gcc_parent_data_4),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

@@ -815,7 +815,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
.parent_data = gcc_parent_data_6,
.num_parents = ARRAY_SIZE(gcc_parent_data_6),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

diff --git a/drivers/clk/qcom/gcc-sm8450.c b/drivers/clk/qcom/gcc-sm8450.c
index 65d7d52bce03..b18bb34889ab 100644
--- a/drivers/clk/qcom/gcc-sm8450.c
+++ b/drivers/clk/qcom/gcc-sm8450.c
@@ -1034,7 +1034,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
.parent_data = gcc_parent_data_7,
.num_parents = ARRAY_SIZE(gcc_parent_data_7),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

@@ -1057,7 +1057,7 @@ static struct clk_rcg2 gcc_sdcc4_apps_clk_src = {
.parent_data = gcc_parent_data_0,
.num_parents = ARRAY_SIZE(gcc_parent_data_0),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

diff --git a/drivers/clk/qcom/gcc-sm8550.c b/drivers/clk/qcom/gcc-sm8550.c
index 862a9bf73bcb..36a5b7de5b55 100644
--- a/drivers/clk/qcom/gcc-sm8550.c
+++ b/drivers/clk/qcom/gcc-sm8550.c
@@ -1025,7 +1025,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
.parent_data = gcc_parent_data_9,
.num_parents = ARRAY_SIZE(gcc_parent_data_9),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_shared_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

@@ -1048,7 +1048,7 @@ static struct clk_rcg2 gcc_sdcc4_apps_clk_src = {
.parent_data = gcc_parent_data_0,
.num_parents = ARRAY_SIZE(gcc_parent_data_0),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_shared_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

diff --git a/drivers/clk/qcom/gcc-sm8650.c b/drivers/clk/qcom/gcc-sm8650.c
index 24f98062b9dd..2dd6444ce036 100644
--- a/drivers/clk/qcom/gcc-sm8650.c
+++ b/drivers/clk/qcom/gcc-sm8650.c
@@ -1257,7 +1257,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
.parent_data = gcc_parent_data_11,
.num_parents = ARRAY_SIZE(gcc_parent_data_11),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_shared_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

@@ -1279,7 +1279,7 @@ static struct clk_rcg2 gcc_sdcc4_apps_clk_src = {
.parent_data = gcc_parent_data_0,
.num_parents = ARRAY_SIZE(gcc_parent_data_0),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_shared_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

diff --git a/drivers/clk/qcom/gcc-sm8750.c b/drivers/clk/qcom/gcc-sm8750.c
index def86b71a3da..db81569dd4b1 100644
--- a/drivers/clk/qcom/gcc-sm8750.c
+++ b/drivers/clk/qcom/gcc-sm8750.c
@@ -1030,7 +1030,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
.parent_data = gcc_parent_data_8,
.num_parents = ARRAY_SIZE(gcc_parent_data_8),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

@@ -1052,7 +1052,7 @@ static struct clk_rcg2 gcc_sdcc4_apps_clk_src = {
.parent_data = gcc_parent_data_0,
.num_parents = ARRAY_SIZE(gcc_parent_data_0),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

diff --git a/drivers/clk/qcom/gcc-x1e80100.c b/drivers/clk/qcom/gcc-x1e80100.c
index 301fc9fc32d8..ef8d2df188d3 100644
--- a/drivers/clk/qcom/gcc-x1e80100.c
+++ b/drivers/clk/qcom/gcc-x1e80100.c
@@ -1123,7 +1123,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
.parent_data = gcc_parent_data_9,
.num_parents = ARRAY_SIZE(gcc_parent_data_9),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

@@ -1145,7 +1145,7 @@ static struct clk_rcg2 gcc_sdcc4_apps_clk_src = {
.parent_data = gcc_parent_data_0,
.num_parents = ARRAY_SIZE(gcc_parent_data_0),
.flags = CLK_SET_RATE_PARENT,
- .ops = &clk_rcg2_floor_ops,
+ .ops = &clk_rcg2_shared_floor_ops,
},
};

diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index 19caf26c991b..2d30b1e24f01 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -693,7 +693,7 @@ void rockchip_clk_register_late_branches(struct device *dev,
break;
}

- if (!pdev)
+ if (IS_ERR_OR_NULL(pdev))
dev_err(dev, "failed to register device for clock %s\n", list->name);
}
}
diff --git a/drivers/clk/sophgo/clk-sg2042-clkgen.c b/drivers/clk/sophgo/clk-sg2042-clkgen.c
index 683661b71787..9725ac4e050a 100644
--- a/drivers/clk/sophgo/clk-sg2042-clkgen.c
+++ b/drivers/clk/sophgo/clk-sg2042-clkgen.c
@@ -180,7 +180,6 @@ static int sg2042_clk_divider_determine_rate(struct clk_hw *hw,
struct clk_rate_request *req)
{
struct sg2042_divider_clock *divider = to_sg2042_clk_divider(hw);
- unsigned long ret_rate;
u32 bestdiv;

/* if read only, just return current value */
@@ -191,17 +190,13 @@ static int sg2042_clk_divider_determine_rate(struct clk_hw *hw,
bestdiv = readl(divider->reg) >> divider->shift;
bestdiv &= clk_div_mask(divider->width);
}
- ret_rate = DIV_ROUND_UP_ULL((u64)req->best_parent_rate, bestdiv);
- } else {
- ret_rate = divider_round_rate(hw, req->rate, &req->best_parent_rate, NULL,
- divider->width, divider->div_flags);
- }
+ req->rate = DIV_ROUND_UP_ULL((u64)req->best_parent_rate, bestdiv);

- pr_debug("--> %s: divider_round_rate: val = %ld\n",
- clk_hw_get_name(hw), ret_rate);
- req->rate = ret_rate;
+ return 0;
+ }

- return 0;
+ return divider_determine_rate(hw, req, NULL, divider->width,
+ divider->div_flags);
}

static int sg2042_clk_divider_set_rate(struct clk_hw *hw,
diff --git a/drivers/clk/spacemit/Makefile b/drivers/clk/spacemit/Makefile
index 5ec6da61db98..ad2bf315109b 100644
--- a/drivers/clk/spacemit/Makefile
+++ b/drivers/clk/spacemit/Makefile
@@ -1,5 +1,10 @@
# SPDX-License-Identifier: GPL-2.0

-obj-$(CONFIG_SPACEMIT_K1_CCU) = spacemit-ccu-k1.o
-spacemit-ccu-k1-y = ccu_pll.o ccu_mix.o ccu_ddn.o
+obj-$(CONFIG_SPACEMIT_CCU) += spacemit-ccu.o
+spacemit-ccu-y += ccu_common.o
+spacemit-ccu-y += ccu_pll.o
+spacemit-ccu-y += ccu_mix.o
+spacemit-ccu-y += ccu_ddn.o
+
+obj-$(CONFIG_SPACEMIT_K1_CCU) += spacemit-ccu-k1.o
spacemit-ccu-k1-y += ccu-k1.o
diff --git a/drivers/clk/spacemit/ccu-k1.c b/drivers/clk/spacemit/ccu-k1.c
index 4761bc1e3b6e..01d9485b615d 100644
--- a/drivers/clk/spacemit/ccu-k1.c
+++ b/drivers/clk/spacemit/ccu-k1.c
@@ -1204,6 +1204,7 @@ static struct platform_driver k1_ccu_driver = {
};
module_platform_driver(k1_ccu_driver);

+MODULE_IMPORT_NS("CLK_SPACEMIT");
MODULE_DESCRIPTION("SpacemiT K1 CCU driver");
MODULE_AUTHOR("Haylen Chu <heylenay@xxxxxxx>");
MODULE_LICENSE("GPL");
diff --git a/drivers/clk/spacemit/ccu_common.c b/drivers/clk/spacemit/ccu_common.c
new file mode 100644
index 000000000000..4412c4104dab
--- /dev/null
+++ b/drivers/clk/spacemit/ccu_common.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SpacemiT CCU common clock driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/clk/spacemit/ccu_ddn.c b/drivers/clk/spacemit/ccu_ddn.c
index 5b16e273bee5..b5540e0781ff 100644
--- a/drivers/clk/spacemit/ccu_ddn.c
+++ b/drivers/clk/spacemit/ccu_ddn.c
@@ -84,3 +84,4 @@ const struct clk_ops spacemit_ccu_ddn_ops = {
.determine_rate = ccu_ddn_determine_rate,
.set_rate = ccu_ddn_set_rate,
};
+EXPORT_SYMBOL_NS_GPL(spacemit_ccu_ddn_ops, "CLK_SPACEMIT");
diff --git a/drivers/clk/spacemit/ccu_mix.c b/drivers/clk/spacemit/ccu_mix.c
index 7b7990875372..67f8b12b4f5b 100644
--- a/drivers/clk/spacemit/ccu_mix.c
+++ b/drivers/clk/spacemit/ccu_mix.c
@@ -198,24 +198,28 @@ const struct clk_ops spacemit_ccu_gate_ops = {
.enable = ccu_gate_enable,
.is_enabled = ccu_gate_is_enabled,
};
+EXPORT_SYMBOL_NS_GPL(spacemit_ccu_gate_ops, "CLK_SPACEMIT");

const struct clk_ops spacemit_ccu_factor_ops = {
.determine_rate = ccu_factor_determine_rate,
.recalc_rate = ccu_factor_recalc_rate,
.set_rate = ccu_factor_set_rate,
};
+EXPORT_SYMBOL_NS_GPL(spacemit_ccu_factor_ops, "CLK_SPACEMIT");

const struct clk_ops spacemit_ccu_mux_ops = {
.determine_rate = ccu_mix_determine_rate,
.get_parent = ccu_mux_get_parent,
.set_parent = ccu_mux_set_parent,
};
+EXPORT_SYMBOL_NS_GPL(spacemit_ccu_mux_ops, "CLK_SPACEMIT");

const struct clk_ops spacemit_ccu_div_ops = {
.determine_rate = ccu_mix_determine_rate,
.recalc_rate = ccu_div_recalc_rate,
.set_rate = ccu_mix_set_rate,
};
+EXPORT_SYMBOL_NS_GPL(spacemit_ccu_div_ops, "CLK_SPACEMIT");

const struct clk_ops spacemit_ccu_factor_gate_ops = {
.disable = ccu_gate_disable,
@@ -226,6 +230,7 @@ const struct clk_ops spacemit_ccu_factor_gate_ops = {
.recalc_rate = ccu_factor_recalc_rate,
.set_rate = ccu_factor_set_rate,
};
+EXPORT_SYMBOL_NS_GPL(spacemit_ccu_factor_gate_ops, "CLK_SPACEMIT");

const struct clk_ops spacemit_ccu_mux_gate_ops = {
.disable = ccu_gate_disable,
@@ -236,6 +241,7 @@ const struct clk_ops spacemit_ccu_mux_gate_ops = {
.get_parent = ccu_mux_get_parent,
.set_parent = ccu_mux_set_parent,
};
+EXPORT_SYMBOL_NS_GPL(spacemit_ccu_mux_gate_ops, "CLK_SPACEMIT");

const struct clk_ops spacemit_ccu_div_gate_ops = {
.disable = ccu_gate_disable,
@@ -246,6 +252,7 @@ const struct clk_ops spacemit_ccu_div_gate_ops = {
.recalc_rate = ccu_div_recalc_rate,
.set_rate = ccu_mix_set_rate,
};
+EXPORT_SYMBOL_NS_GPL(spacemit_ccu_div_gate_ops, "CLK_SPACEMIT");

const struct clk_ops spacemit_ccu_mux_div_gate_ops = {
.disable = ccu_gate_disable,
@@ -259,6 +266,7 @@ const struct clk_ops spacemit_ccu_mux_div_gate_ops = {
.recalc_rate = ccu_div_recalc_rate,
.set_rate = ccu_mix_set_rate,
};
+EXPORT_SYMBOL_NS_GPL(spacemit_ccu_mux_div_gate_ops, "CLK_SPACEMIT");

const struct clk_ops spacemit_ccu_mux_div_ops = {
.get_parent = ccu_mux_get_parent,
@@ -268,3 +276,4 @@ const struct clk_ops spacemit_ccu_mux_div_ops = {
.recalc_rate = ccu_div_recalc_rate,
.set_rate = ccu_mix_set_rate,
};
+EXPORT_SYMBOL_NS_GPL(spacemit_ccu_mux_div_ops, "CLK_SPACEMIT");
diff --git a/drivers/clk/spacemit/ccu_pll.c b/drivers/clk/spacemit/ccu_pll.c
index d92f0dae65a4..76d0244873d8 100644
--- a/drivers/clk/spacemit/ccu_pll.c
+++ b/drivers/clk/spacemit/ccu_pll.c
@@ -157,3 +157,4 @@ const struct clk_ops spacemit_ccu_pll_ops = {
.determine_rate = ccu_pll_determine_rate,
.is_enabled = ccu_pll_is_enabled,
};
+EXPORT_SYMBOL_NS_GPL(spacemit_ccu_pll_ops, "CLK_SPACEMIT");
diff --git a/drivers/clk/sprd/div.c b/drivers/clk/sprd/div.c
index 013423881968..cd57163a7204 100644
--- a/drivers/clk/sprd/div.c
+++ b/drivers/clk/sprd/div.c
@@ -14,11 +14,7 @@ static int sprd_div_determine_rate(struct clk_hw *hw,
{
struct sprd_div *cd = hw_to_sprd_div(hw);

- req->rate = divider_round_rate(&cd->common.hw, req->rate,
- &req->best_parent_rate,
- NULL, cd->div.width, 0);
-
- return 0;
+ return divider_determine_rate(&cd->common.hw, req, NULL, cd->div.width, 0);
}

unsigned long sprd_div_helper_recalc_rate(struct sprd_clk_common *common,
diff --git a/drivers/clk/stm32/clk-stm32-core.c b/drivers/clk/stm32/clk-stm32-core.c
index 72825b9c36a4..e921c25a929c 100644
--- a/drivers/clk/stm32/clk-stm32-core.c
+++ b/drivers/clk/stm32/clk-stm32-core.c
@@ -369,22 +369,14 @@ static int clk_stm32_divider_determine_rate(struct clk_hw *hw,
val = readl(div->base + divider->offset) >> divider->shift;
val &= clk_div_mask(divider->width);

- req->rate = divider_ro_round_rate(hw, req->rate,
- &req->best_parent_rate,
- divider->table,
- divider->width,
- divider->flags, val);
-
- return 0;
+ return divider_ro_determine_rate(hw, req,
+ divider->table,
+ divider->width,
+ divider->flags, val);
}

- req->rate = divider_round_rate_parent(hw, clk_hw_get_parent(hw),
- req->rate,
- &req->best_parent_rate,
- divider->table,
- divider->width, divider->flags);
-
- return 0;
+ return divider_determine_rate(hw, req, divider->table, divider->width,
+ divider->flags);
}

static unsigned long clk_stm32_divider_recalc_rate(struct clk_hw *hw,
@@ -441,7 +433,6 @@ static int clk_stm32_composite_determine_rate(struct clk_hw *hw,
{
struct clk_stm32_composite *composite = to_clk_stm32_composite(hw);
const struct stm32_div_cfg *divider;
- long rate;

if (composite->div_id == NO_STM32_DIV)
return 0;
@@ -455,24 +446,13 @@ static int clk_stm32_composite_determine_rate(struct clk_hw *hw,
val = readl(composite->base + divider->offset) >> divider->shift;
val &= clk_div_mask(divider->width);

- rate = divider_ro_round_rate(hw, req->rate, &req->best_parent_rate,
- divider->table, divider->width, divider->flags,
- val);
- if (rate < 0)
- return rate;
-
- req->rate = rate;
- return 0;
+ return divider_ro_determine_rate(hw, req, divider->table,
+ divider->width, divider->flags,
+ val);
}

- rate = divider_round_rate_parent(hw, clk_hw_get_parent(hw),
- req->rate, &req->best_parent_rate,
- divider->table, divider->width, divider->flags);
- if (rate < 0)
- return rate;
-
- req->rate = rate;
- return 0;
+ return divider_determine_rate(hw, req, divider->table, divider->width,
+ divider->flags);
}

static u8 clk_stm32_composite_get_parent(struct clk_hw *hw)
diff --git a/drivers/clk/thead/clk-th1520-ap.c b/drivers/clk/thead/clk-th1520-ap.c
index 71ad03a998e8..d870f0c665f8 100644
--- a/drivers/clk/thead/clk-th1520-ap.c
+++ b/drivers/clk/thead/clk-th1520-ap.c
@@ -8,11 +8,14 @@
#include <dt-bindings/clock/thead,th1520-clk-ap.h>
#include <linux/bitfield.h>
#include <linux/clk-provider.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>

+#define TH1520_PLL_STS 0x80
+
#define TH1520_PLL_POSTDIV2 GENMASK(26, 24)
#define TH1520_PLL_POSTDIV1 GENMASK(22, 20)
#define TH1520_PLL_FBDIV GENMASK(19, 8)
@@ -23,6 +26,13 @@
#define TH1520_PLL_FRAC GENMASK(23, 0)
#define TH1520_PLL_FRAC_BITS 24

+/*
+ * All PLLs in TH1520 take 21250ns at maximum to lock, let's take its double
+ * for safety.
+ */
+#define TH1520_PLL_LOCK_TIMEOUT_US 44
+#define TH1520_PLL_STABLE_DELAY_US 30
+
struct ccu_internal {
u8 shift;
u8 width;
@@ -64,6 +74,7 @@ struct ccu_div {

struct ccu_pll {
struct ccu_common common;
+ u32 lock_sts_mask;
};

#define TH_CCU_ARG(_shift, _width) \
@@ -299,9 +310,21 @@ static void ccu_pll_disable(struct clk_hw *hw)
static int ccu_pll_enable(struct clk_hw *hw)
{
struct ccu_pll *pll = hw_to_ccu_pll(hw);
+ u32 reg;
+ int ret;

- return regmap_clear_bits(pll->common.map, pll->common.cfg1,
- TH1520_PLL_VCO_RST);
+ regmap_clear_bits(pll->common.map, pll->common.cfg1,
+ TH1520_PLL_VCO_RST);
+
+ ret = regmap_read_poll_timeout_atomic(pll->common.map, TH1520_PLL_STS,
+ reg, reg & pll->lock_sts_mask,
+ 5, TH1520_PLL_LOCK_TIMEOUT_US);
+ if (ret)
+ return ret;
+
+ udelay(TH1520_PLL_STABLE_DELAY_US);
+
+ return 0;
}

static int ccu_pll_is_enabled(struct clk_hw *hw)
@@ -389,6 +412,7 @@ static struct ccu_pll cpu_pll0_clk = {
&clk_pll_ops,
CLK_IS_CRITICAL),
},
+ .lock_sts_mask = BIT(1),
};

static struct ccu_pll cpu_pll1_clk = {
@@ -401,6 +425,7 @@ static struct ccu_pll cpu_pll1_clk = {
&clk_pll_ops,
CLK_IS_CRITICAL),
},
+ .lock_sts_mask = BIT(4),
};

static struct ccu_pll gmac_pll_clk = {
@@ -413,6 +438,7 @@ static struct ccu_pll gmac_pll_clk = {
&clk_pll_ops,
CLK_IS_CRITICAL),
},
+ .lock_sts_mask = BIT(3),
};

static const struct clk_hw *gmac_pll_clk_parent[] = {
@@ -433,6 +459,7 @@ static struct ccu_pll video_pll_clk = {
&clk_pll_ops,
CLK_IS_CRITICAL),
},
+ .lock_sts_mask = BIT(7),
};

static const struct clk_hw *video_pll_clk_parent[] = {
@@ -453,6 +480,7 @@ static struct ccu_pll dpu0_pll_clk = {
&clk_pll_ops,
0),
},
+ .lock_sts_mask = BIT(8),
};

static const struct clk_hw *dpu0_pll_clk_parent[] = {
@@ -469,6 +497,7 @@ static struct ccu_pll dpu1_pll_clk = {
&clk_pll_ops,
0),
},
+ .lock_sts_mask = BIT(9),
};

static const struct clk_hw *dpu1_pll_clk_parent[] = {
@@ -485,6 +514,7 @@ static struct ccu_pll tee_pll_clk = {
&clk_pll_ops,
CLK_IS_CRITICAL),
},
+ .lock_sts_mask = BIT(10),
};

static const struct clk_parent_data c910_i0_parents[] = {
diff --git a/drivers/clk/x86/clk-cgu.c b/drivers/clk/x86/clk-cgu.c
index d099667355f8..92ee05d75af2 100644
--- a/drivers/clk/x86/clk-cgu.c
+++ b/drivers/clk/x86/clk-cgu.c
@@ -137,10 +137,8 @@ static int lgm_clk_divider_determine_rate(struct clk_hw *hw,
{
struct lgm_clk_divider *divider = to_lgm_clk_divider(hw);

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate, divider->table,
- divider->width, divider->flags);
-
- return 0;
+ return divider_determine_rate(hw, req, divider->table, divider->width,
+ divider->flags);
}

static int
diff --git a/drivers/clk/zynqmp/divider.c b/drivers/clk/zynqmp/divider.c
index c824eeacd8eb..984e577ea671 100644
--- a/drivers/clk/zynqmp/divider.c
+++ b/drivers/clk/zynqmp/divider.c
@@ -111,10 +111,9 @@ static unsigned long zynqmp_clk_divider_recalc_rate(struct clk_hw *hw,
}

/**
- * zynqmp_clk_divider_round_rate() - Round rate of divider clock
+ * zynqmp_clk_divider_determine_rate() - Determine rate of divider clock
* @hw: handle between common and hardware-specific interfaces
- * @rate: rate of clock to be set
- * @prate: rate of parent clock
+ * @req: rate of clock to be set
*
* Return: 0 on success else error+reason
*/
@@ -151,8 +150,9 @@ static int zynqmp_clk_divider_determine_rate(struct clk_hw *hw,

width = fls(divider->max_div);

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
- NULL, width, divider->flags);
+ ret = divider_determine_rate(hw, req, NULL, width, divider->flags);
+ if (ret != 0)
+ return ret;

if (divider->is_frac && (clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT) &&
(req->rate % req->best_parent_rate))
diff --git a/drivers/clk/zynqmp/pll.c b/drivers/clk/zynqmp/pll.c
index 630a3936c97c..6bc2c3934f56 100644
--- a/drivers/clk/zynqmp/pll.c
+++ b/drivers/clk/zynqmp/pll.c
@@ -91,10 +91,9 @@ static inline void zynqmp_pll_set_mode(struct clk_hw *hw, bool on)
}

/**
- * zynqmp_pll_round_rate() - Round a clock frequency
+ * zynqmp_pll_determine_rate() - Round a clock frequency
* @hw: Handle between common and hardware-specific interfaces
- * @rate: Desired clock frequency
- * @prate: Clock frequency of parent clock
+ * @req: Desired clock frequency
*
* Return: Frequency closest to @rate the hardware can generate
*/
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 492a10f1bdbf..38333f7da40d 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1152,7 +1152,7 @@ static void hybrid_init_cpu_capacity_scaling(bool refresh)
* the capacity of SMT threads is not deterministic even approximately,
* do not do that when SMT is in use.
*/
- if (hwp_is_hybrid && !sched_smt_active() && arch_enable_hybrid_capacity_scale()) {
+ if (hwp_is_hybrid && !cpu_smt_possible() && arch_enable_hybrid_capacity_scale()) {
hybrid_refresh_cpu_capacity_scaling();
/*
* Disabling ITMT causes sched domains to be rebuilt to disable asym
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index d2a110079f5f..c450cf9c881d 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -101,6 +101,7 @@ static int scmi_cpu_domain_id(struct device *cpu_dev)
return -EINVAL;
}

+ of_node_put(domain_id.np);
return domain_id.args[0];
}

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 56132e843c99..8950796a493d 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -357,6 +357,16 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick)
{
+ /*
+ * If there is only a single idle state (or none), there is nothing
+ * meaningful for the governor to choose. Skip the governor and
+ * always use state 0 with the tick running.
+ */
+ if (drv->state_count <= 1) {
+ *stop_tick = false;
+ return 0;
+ }
+
return cpuidle_curr_governor->select(drv, dev, stop_tick);
}

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 64d6f7a1c776..ca863ba03d45 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -239,7 +239,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,

/* Find the shortest expected idle interval. */
predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
- if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
+ if (predicted_ns > RESIDENCY_THRESHOLD_NS || tick_nohz_tick_stopped()) {
unsigned int timer_us;

/* Determine the time till the closest timer. */
@@ -259,6 +259,16 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
RESOLUTION * DECAY * NSEC_PER_USEC);
/* Use the lowest expected idle interval to pick the idle state. */
predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns);
+ /*
+ * If the tick is already stopped, the cost of possible short
+ * idle duration misprediction is much higher, because the CPU
+ * may be stuck in a shallow idle state for a long time as a
+ * result of it. In that case, say we might mispredict and use
+ * the known time till the closest timer event for the idle
+ * state selection.
+ */
+ if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC)
+ predicted_ns = data->next_timer_ns;
} else {
/*
* Because the next timer event is not going to be determined
@@ -284,16 +294,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
return 0;
}

- /*
- * If the tick is already stopped, the cost of possible short idle
- * duration misprediction is much higher, because the CPU may be stuck
- * in a shallow idle state for a long time as a result of it. In that
- * case, say we might mispredict and use the known time till the closest
- * timer event for the idle state selection.
- */
- if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC)
- predicted_ns = data->next_timer_ns;
-
/*
* Find the idle state with the lowest power while satisfying
* our constraints.
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
index 107ccb2ade42..c6117c23eb25 100644
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -4814,7 +4814,8 @@ static void dpaa2_dpseci_free(struct dpaa2_caam_priv *priv)
{
struct device *dev = priv->dev;
struct fsl_mc_device *ls_dev = to_fsl_mc_device(dev);
- int err;
+ struct dpaa2_caam_priv_per_cpu *ppriv;
+ int i, err;

if (DPSECI_VER(priv->major_ver, priv->minor_ver) > DPSECI_VER(5, 3)) {
err = dpseci_reset(priv->mc_io, 0, ls_dev->mc_handle);
@@ -4822,6 +4823,12 @@ static void dpaa2_dpseci_free(struct dpaa2_caam_priv *priv)
dev_err(dev, "dpseci_reset() failed\n");
}

+ for_each_cpu(i, priv->clean_mask) {
+ ppriv = per_cpu_ptr(priv->ppriv, i);
+ free_netdev(ppriv->net_dev);
+ }
+ free_cpumask_var(priv->clean_mask);
+
dpaa2_dpseci_congestion_free(priv);
dpseci_close(priv->mc_io, 0, ls_dev->mc_handle);
}
@@ -5007,16 +5014,15 @@ static int __cold dpaa2_dpseci_setup(struct fsl_mc_device *ls_dev)
struct device *dev = &ls_dev->dev;
struct dpaa2_caam_priv *priv;
struct dpaa2_caam_priv_per_cpu *ppriv;
- cpumask_var_t clean_mask;
int err, cpu;
u8 i;

err = -ENOMEM;
- if (!zalloc_cpumask_var(&clean_mask, GFP_KERNEL))
- goto err_cpumask;
-
priv = dev_get_drvdata(dev);

+ if (!zalloc_cpumask_var(&priv->clean_mask, GFP_KERNEL))
+ goto err_cpumask;
+
priv->dev = dev;
priv->dpsec_id = ls_dev->obj_desc.id;

@@ -5118,7 +5124,7 @@ static int __cold dpaa2_dpseci_setup(struct fsl_mc_device *ls_dev)
err = -ENOMEM;
goto err_alloc_netdev;
}
- cpumask_set_cpu(cpu, clean_mask);
+ cpumask_set_cpu(cpu, priv->clean_mask);
ppriv->net_dev->dev = *dev;

netif_napi_add_tx_weight(ppriv->net_dev, &ppriv->napi,
@@ -5126,18 +5132,16 @@ static int __cold dpaa2_dpseci_setup(struct fsl_mc_device *ls_dev)
DPAA2_CAAM_NAPI_WEIGHT);
}

- err = 0;
- goto free_cpumask;
+ return 0;

err_alloc_netdev:
- free_dpaa2_pcpu_netdev(priv, clean_mask);
+ free_dpaa2_pcpu_netdev(priv, priv->clean_mask);
err_get_rx_queue:
dpaa2_dpseci_congestion_free(priv);
err_get_vers:
dpseci_close(priv->mc_io, 0, ls_dev->mc_handle);
err_open:
-free_cpumask:
- free_cpumask_var(clean_mask);
+ free_cpumask_var(priv->clean_mask);
err_cpumask:
return err;
}
@@ -5182,7 +5186,6 @@ static int __cold dpaa2_dpseci_disable(struct dpaa2_caam_priv *priv)
ppriv = per_cpu_ptr(priv->ppriv, i);
napi_disable(&ppriv->napi);
netif_napi_del(&ppriv->napi);
- free_netdev(ppriv->net_dev);
}

return 0;
diff --git a/drivers/crypto/caam/caamalg_qi2.h b/drivers/crypto/caam/caamalg_qi2.h
index 61d1219a202f..8e65b4b28c7b 100644
--- a/drivers/crypto/caam/caamalg_qi2.h
+++ b/drivers/crypto/caam/caamalg_qi2.h
@@ -42,6 +42,7 @@
* @mc_io: pointer to MC portal's I/O object
* @domain: IOMMU domain
* @ppriv: per CPU pointers to privata data
+ * @clean_mask: CPU mask of CPUs that have allocated netdevs
*/
struct dpaa2_caam_priv {
int dpsec_id;
@@ -65,6 +66,7 @@ struct dpaa2_caam_priv {

struct dpaa2_caam_priv_per_cpu __percpu *ppriv;
struct dentry *dfs_root;
+ cpumask_var_t clean_mask;
};

/**
diff --git a/drivers/crypto/cavium/cpt/cptvf_main.c b/drivers/crypto/cavium/cpt/cptvf_main.c
index c246920e6f54..bccd680c7f7e 100644
--- a/drivers/crypto/cavium/cpt/cptvf_main.c
+++ b/drivers/crypto/cavium/cpt/cptvf_main.c
@@ -180,7 +180,8 @@ static void free_command_queues(struct cpt_vf *cptvf,

hlist_for_each_entry_safe(chunk, node, &cqinfo->queue[i].chead,
nextchunk) {
- dma_free_coherent(&pdev->dev, chunk->size,
+ dma_free_coherent(&pdev->dev,
+ chunk->size + CPT_NEXT_CHUNK_PTR_SIZE,
chunk->head,
chunk->dma_addr);
chunk->head = NULL;
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index d78865d9d5f0..d0412e584762 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -642,7 +642,7 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
struct ccp_data dst;
struct ccp_data aad;
struct ccp_op op;
- } *wa __cleanup(kfree) = kzalloc(sizeof *wa, GFP_KERNEL);
+ } *wa __free(kfree) = kzalloc(sizeof(*wa), GFP_KERNEL);
unsigned int dm_offset;
unsigned int authsize;
unsigned int jobid;
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index 9e21da0e298a..5c7f7e02a7d8 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -351,6 +351,17 @@ struct psp_device *psp_get_master_device(void)
return sp ? sp->psp_data : NULL;
}

+int psp_restore(struct sp_device *sp)
+{
+ struct psp_device *psp = sp->psp_data;
+ int ret = 0;
+
+ if (psp->tee_data)
+ ret = tee_restore(psp);
+
+ return ret;
+}
+
void psp_pci_init(void)
{
psp_master = psp_get_master_device();
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 0d13d47c164b..5fdba0fe4acc 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -119,13 +119,6 @@ static size_t sev_es_tmr_size = SEV_TMR_SIZE;
#define NV_LENGTH (32 * 1024)
static void *sev_init_ex_buffer;

-/*
- * SEV_DATA_RANGE_LIST:
- * Array containing range of pages that firmware transitions to HV-fixed
- * page state.
- */
-static struct sev_data_range_list *snp_range_list;
-
static void __sev_firmware_shutdown(struct sev_device *sev, bool panic);

static int snp_shutdown_on_panic(struct notifier_block *nb,
@@ -1365,6 +1358,7 @@ static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg)

static int __sev_snp_init_locked(int *error, unsigned int max_snp_asid)
{
+ struct sev_data_range_list *snp_range_list __free(kfree) = NULL;
struct psp_device *psp = psp_master;
struct sev_data_snp_init_ex data;
struct sev_device *sev;
@@ -2351,11 +2345,10 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable)
static int sev_ioctl_do_snp_platform_status(struct sev_issue_cmd *argp)
{
struct sev_device *sev = psp_master->sev_data;
- bool shutdown_required = false;
struct sev_data_snp_addr buf;
struct page *status_page;
- int ret, error;
void *data;
+ int ret;

if (!argp->data)
return -EINVAL;
@@ -2366,31 +2359,35 @@ static int sev_ioctl_do_snp_platform_status(struct sev_issue_cmd *argp)

data = page_address(status_page);

- if (!sev->snp_initialized) {
- ret = snp_move_to_init_state(argp, &shutdown_required);
- if (ret)
- goto cleanup;
- }
-
/*
- * Firmware expects status page to be in firmware-owned state, otherwise
- * it will report firmware error code INVALID_PAGE_STATE (0x1A).
+ * SNP_PLATFORM_STATUS can be executed in any SNP state. But if executed
+ * when SNP has been initialized, the status page must be firmware-owned.
*/
- if (rmp_mark_pages_firmware(__pa(data), 1, true)) {
- ret = -EFAULT;
- goto cleanup;
+ if (sev->snp_initialized) {
+ /*
+ * Firmware expects the status page to be in Firmware state,
+ * otherwise it will report an error INVALID_PAGE_STATE.
+ */
+ if (rmp_mark_pages_firmware(__pa(data), 1, true)) {
+ ret = -EFAULT;
+ goto cleanup;
+ }
}

buf.address = __psp_pa(data);
ret = __sev_do_cmd_locked(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &argp->error);

- /*
- * Status page will be transitioned to Reclaim state upon success, or
- * left in Firmware state in failure. Use snp_reclaim_pages() to
- * transition either case back to Hypervisor-owned state.
- */
- if (snp_reclaim_pages(__pa(data), 1, true))
- return -EFAULT;
+ if (sev->snp_initialized) {
+ /*
+ * The status page will be in Reclaim state on success, or left
+ * in Firmware state on failure. Use snp_reclaim_pages() to
+ * transition either case back to Hypervisor-owned state.
+ */
+ if (snp_reclaim_pages(__pa(data), 1, true)) {
+ snp_leak_pages(__page_to_pfn(status_page), 1);
+ return -EFAULT;
+ }
+ }

if (ret)
goto cleanup;
@@ -2400,9 +2397,6 @@ static int sev_ioctl_do_snp_platform_status(struct sev_issue_cmd *argp)
ret = -EFAULT;

cleanup:
- if (shutdown_required)
- __sev_snp_shutdown_locked(&error, false);
-
__free_pages(status_page, 0);
return ret;
}
@@ -2753,11 +2747,6 @@ static void __sev_firmware_shutdown(struct sev_device *sev, bool panic)
sev_init_ex_buffer = NULL;
}

- if (snp_range_list) {
- kfree(snp_range_list);
- snp_range_list = NULL;
- }
-
__sev_snp_shutdown_locked(&error, panic);
}

diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c
index 3467f6db4f50..f204aa5df96e 100644
--- a/drivers/crypto/ccp/sp-dev.c
+++ b/drivers/crypto/ccp/sp-dev.c
@@ -230,6 +230,18 @@ int sp_resume(struct sp_device *sp)
return 0;
}

+int sp_restore(struct sp_device *sp)
+{
+ if (sp->psp_data) {
+ int ret = psp_restore(sp);
+
+ if (ret)
+ return ret;
+ }
+
+ return sp_resume(sp);
+}
+
struct sp_device *sp_get_psp_master_device(void)
{
struct sp_device *i, *ret = NULL;
diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h
index 6f9d7063257d..c8a611ef275b 100644
--- a/drivers/crypto/ccp/sp-dev.h
+++ b/drivers/crypto/ccp/sp-dev.h
@@ -141,6 +141,7 @@ void sp_destroy(struct sp_device *sp);

int sp_suspend(struct sp_device *sp);
int sp_resume(struct sp_device *sp);
+int sp_restore(struct sp_device *sp);
int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler,
const char *name, void *data);
void sp_free_ccp_irq(struct sp_device *sp, void *data);
@@ -174,6 +175,7 @@ int psp_dev_init(struct sp_device *sp);
void psp_pci_init(void);
void psp_dev_destroy(struct sp_device *sp);
void psp_pci_exit(void);
+int psp_restore(struct sp_device *sp);

#else /* !CONFIG_CRYPTO_DEV_SP_PSP */

@@ -181,6 +183,7 @@ static inline int psp_dev_init(struct sp_device *sp) { return 0; }
static inline void psp_pci_init(void) { }
static inline void psp_dev_destroy(struct sp_device *sp) { }
static inline void psp_pci_exit(void) { }
+static inline int psp_restore(struct sp_device *sp) { return 0; }

#endif /* CONFIG_CRYPTO_DEV_SP_PSP */

diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index 8891ceee1d7d..6ac805d99ccb 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -353,6 +353,13 @@ static int __maybe_unused sp_pci_resume(struct device *dev)
return sp_resume(sp);
}

+static int __maybe_unused sp_pci_restore(struct device *dev)
+{
+ struct sp_device *sp = dev_get_drvdata(dev);
+
+ return sp_restore(sp);
+}
+
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
static const struct sev_vdata sevv1 = {
.cmdresp_reg = 0x10580, /* C2PMSG_32 */
@@ -563,7 +570,14 @@ static const struct pci_device_id sp_pci_table[] = {
};
MODULE_DEVICE_TABLE(pci, sp_pci_table);

-static SIMPLE_DEV_PM_OPS(sp_pci_pm_ops, sp_pci_suspend, sp_pci_resume);
+static const struct dev_pm_ops sp_pci_pm_ops = {
+ .suspend = pm_sleep_ptr(sp_pci_suspend),
+ .resume = pm_sleep_ptr(sp_pci_resume),
+ .freeze = pm_sleep_ptr(sp_pci_suspend),
+ .thaw = pm_sleep_ptr(sp_pci_resume),
+ .poweroff = pm_sleep_ptr(sp_pci_suspend),
+ .restore_early = pm_sleep_ptr(sp_pci_restore),
+};

static struct pci_driver sp_pci_driver = {
.name = "ccp",
diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c
index 5e1d80724678..92ffa412622a 100644
--- a/drivers/crypto/ccp/tee-dev.c
+++ b/drivers/crypto/ccp/tee-dev.c
@@ -86,10 +86,34 @@ static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd)
kfree(cmd);
}

+static bool tee_send_destroy_cmd(struct psp_tee_device *tee)
+{
+ unsigned int reg;
+ int ret;
+
+ ret = psp_mailbox_command(tee->psp, PSP_CMD_TEE_RING_DESTROY, NULL,
+ TEE_DEFAULT_CMD_TIMEOUT, &reg);
+ if (ret) {
+ dev_err(tee->dev, "tee: ring destroy command timed out, disabling TEE support\n");
+ psp_dead = true;
+ return false;
+ }
+
+ if (FIELD_GET(PSP_CMDRESP_STS, reg)) {
+ dev_err(tee->dev, "tee: ring destroy command failed (%#010lx)\n",
+ FIELD_GET(PSP_CMDRESP_STS, reg));
+ psp_dead = true;
+ return false;
+ }
+
+ return true;
+}
+
static int tee_init_ring(struct psp_tee_device *tee)
{
int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd);
struct tee_init_ring_cmd *cmd;
+ bool retry = false;
unsigned int reg;
int ret;

@@ -112,6 +136,7 @@ static int tee_init_ring(struct psp_tee_device *tee)
/* Send command buffer details to Trusted OS by writing to
* CPU-PSP message registers
*/
+retry_init:
ret = psp_mailbox_command(tee->psp, PSP_CMD_TEE_RING_INIT, cmd,
TEE_DEFAULT_CMD_TIMEOUT, &reg);
if (ret) {
@@ -122,9 +147,22 @@ static int tee_init_ring(struct psp_tee_device *tee)
}

if (FIELD_GET(PSP_CMDRESP_STS, reg)) {
+ /*
+ * During the hibernate resume sequence driver may have gotten loaded
+ * but the ring not properly destroyed. If the ring doesn't work, try
+ * to destroy and re-init once.
+ */
+ if (!retry && FIELD_GET(PSP_CMDRESP_STS, reg) == PSP_TEE_STS_RING_BUSY) {
+ dev_info(tee->dev, "tee: ring init command failed with busy status, retrying\n");
+ if (tee_send_destroy_cmd(tee)) {
+ retry = true;
+ goto retry_init;
+ }
+ }
dev_err(tee->dev, "tee: ring init command failed (%#010lx)\n",
FIELD_GET(PSP_CMDRESP_STS, reg));
tee_free_ring(tee);
+ psp_dead = true;
ret = -EIO;
}

@@ -136,24 +174,13 @@ static int tee_init_ring(struct psp_tee_device *tee)

static void tee_destroy_ring(struct psp_tee_device *tee)
{
- unsigned int reg;
- int ret;
-
if (!tee->rb_mgr.ring_start)
return;

if (psp_dead)
goto free_ring;

- ret = psp_mailbox_command(tee->psp, PSP_CMD_TEE_RING_DESTROY, NULL,
- TEE_DEFAULT_CMD_TIMEOUT, &reg);
- if (ret) {
- dev_err(tee->dev, "tee: ring destroy command timed out, disabling TEE support\n");
- psp_dead = true;
- } else if (FIELD_GET(PSP_CMDRESP_STS, reg)) {
- dev_err(tee->dev, "tee: ring destroy command failed (%#010lx)\n",
- FIELD_GET(PSP_CMDRESP_STS, reg));
- }
+ tee_send_destroy_cmd(tee);

free_ring:
tee_free_ring(tee);
@@ -365,3 +392,8 @@ int psp_check_tee_status(void)
return 0;
}
EXPORT_SYMBOL(psp_check_tee_status);
+
+int tee_restore(struct psp_device *psp)
+{
+ return tee_init_ring(psp->tee_data);
+}
diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h
index ea9a2b7c05f5..c23416cb7bb3 100644
--- a/drivers/crypto/ccp/tee-dev.h
+++ b/drivers/crypto/ccp/tee-dev.h
@@ -111,5 +111,6 @@ struct tee_ring_cmd {

int tee_dev_init(struct psp_device *psp);
void tee_dev_destroy(struct psp_device *psp);
+int tee_restore(struct psp_device *psp);

#endif /* __TEE_DEV_H__ */
diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig
index 4835bdebdbb3..a0cb1a8186ac 100644
--- a/drivers/crypto/hisilicon/Kconfig
+++ b/drivers/crypto/hisilicon/Kconfig
@@ -57,6 +57,7 @@ config CRYPTO_DEV_HISI_ZIP
depends on UACCE || UACCE=n
depends on ACPI
select CRYPTO_DEV_HISI_QM
+ select CRYPTO_DEFLATE
help
Support for HiSilicon ZIP Driver

diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h
index 0f3ddbadbcf9..021dbd9a1d48 100644
--- a/drivers/crypto/hisilicon/hpre/hpre.h
+++ b/drivers/crypto/hisilicon/hpre/hpre.h
@@ -94,9 +94,8 @@ struct hpre_sqe {
__le64 key;
__le64 in;
__le64 out;
- __le16 tag;
- __le16 resv2;
-#define _HPRE_SQE_ALIGN_EXT 7
+ __le64 tag;
+#define _HPRE_SQE_ALIGN_EXT 6
__le32 rsvd1[_HPRE_SQE_ALIGN_EXT];
};

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index 21ccf879f70c..839c1f677143 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -93,6 +93,7 @@ struct hpre_dh_ctx {

char *g; /* m */
dma_addr_t dma_g;
+ struct crypto_kpp *soft_tfm;
};

struct hpre_ecdh_ctx {
@@ -103,17 +104,15 @@ struct hpre_ecdh_ctx {
/* low address: x->y */
unsigned char *g;
dma_addr_t dma_g;
+ struct crypto_kpp *soft_tfm;
};

struct hpre_ctx {
struct hisi_qp *qp;
struct device *dev;
- struct hpre_asym_request **req_list;
struct hpre *hpre;
- spinlock_t req_lock;
unsigned int key_sz;
bool crt_g2_mode;
- struct idr req_idr;
union {
struct hpre_rsa_ctx rsa;
struct hpre_dh_ctx dh;
@@ -123,6 +122,7 @@ struct hpre_ctx {
unsigned int curve_id;
/* for high performance core */
u8 enable_hpcore;
+ bool fallback;
};

struct hpre_asym_request {
@@ -136,7 +136,6 @@ struct hpre_asym_request {
struct kpp_request *ecdh;
} areq;
int err;
- int req_id;
hpre_cb cb;
struct timespec64 req_time;
};
@@ -151,79 +150,13 @@ static inline unsigned int hpre_align_pd(void)
return (hpre_align_sz() - 1) & ~(crypto_tfm_ctx_alignment() - 1);
}

-static int hpre_alloc_req_id(struct hpre_ctx *ctx)
+static void hpre_dfx_add_req_time(struct hpre_asym_request *hpre_req)
{
- unsigned long flags;
- int id;
-
- spin_lock_irqsave(&ctx->req_lock, flags);
- id = idr_alloc(&ctx->req_idr, NULL, 0, ctx->qp->sq_depth, GFP_ATOMIC);
- spin_unlock_irqrestore(&ctx->req_lock, flags);
-
- return id;
-}
-
-static void hpre_free_req_id(struct hpre_ctx *ctx, int req_id)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&ctx->req_lock, flags);
- idr_remove(&ctx->req_idr, req_id);
- spin_unlock_irqrestore(&ctx->req_lock, flags);
-}
-
-static int hpre_add_req_to_ctx(struct hpre_asym_request *hpre_req)
-{
- struct hpre_ctx *ctx;
- struct hpre_dfx *dfx;
- int id;
-
- ctx = hpre_req->ctx;
- id = hpre_alloc_req_id(ctx);
- if (unlikely(id < 0))
- return -EINVAL;
-
- ctx->req_list[id] = hpre_req;
- hpre_req->req_id = id;
+ struct hpre_ctx *ctx = hpre_req->ctx;
+ struct hpre_dfx *dfx = ctx->hpre->debug.dfx;

- dfx = ctx->hpre->debug.dfx;
if (atomic64_read(&dfx[HPRE_OVERTIME_THRHLD].value))
ktime_get_ts64(&hpre_req->req_time);
-
- return id;
-}
-
-static void hpre_rm_req_from_ctx(struct hpre_asym_request *hpre_req)
-{
- struct hpre_ctx *ctx = hpre_req->ctx;
- int id = hpre_req->req_id;
-
- if (hpre_req->req_id >= 0) {
- hpre_req->req_id = HPRE_INVLD_REQ_ID;
- ctx->req_list[id] = NULL;
- hpre_free_req_id(ctx, id);
- }
-}
-
-static struct hisi_qp *hpre_get_qp_and_start(u8 type)
-{
- struct hisi_qp *qp;
- int ret;
-
- qp = hpre_create_qp(type);
- if (!qp) {
- pr_err("Can not create hpre qp!\n");
- return ERR_PTR(-ENODEV);
- }
-
- ret = hisi_qm_start_qp(qp, 0);
- if (ret < 0) {
- hisi_qm_free_qps(&qp, 1);
- pci_err(qp->qm->pdev, "Can not start qp!\n");
- return ERR_PTR(-EINVAL);
- }
-
- return qp;
}

static int hpre_get_data_dma_addr(struct hpre_asym_request *hpre_req,
@@ -340,26 +273,19 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx,
static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe,
void **kreq)
{
- struct hpre_asym_request *req;
unsigned int err, done, alg;
- int id;

#define HPRE_NO_HW_ERR 0
#define HPRE_HW_TASK_DONE 3
#define HREE_HW_ERR_MASK GENMASK(10, 0)
#define HREE_SQE_DONE_MASK GENMASK(1, 0)
#define HREE_ALG_TYPE_MASK GENMASK(4, 0)
- id = (int)le16_to_cpu(sqe->tag);
- req = ctx->req_list[id];
- hpre_rm_req_from_ctx(req);
- *kreq = req;
+ *kreq = (void *)le64_to_cpu(sqe->tag);

err = (le32_to_cpu(sqe->dw0) >> HPRE_SQE_ALG_BITS) &
HREE_HW_ERR_MASK;
-
done = (le32_to_cpu(sqe->dw0) >> HPRE_SQE_DONE_SHIFT) &
HREE_SQE_DONE_MASK;
-
if (likely(err == HPRE_NO_HW_ERR && done == HPRE_HW_TASK_DONE))
return 0;

@@ -370,36 +296,10 @@ static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe,
return -EINVAL;
}

-static int hpre_ctx_set(struct hpre_ctx *ctx, struct hisi_qp *qp, int qlen)
-{
- struct hpre *hpre;
-
- if (!ctx || !qp || qlen < 0)
- return -EINVAL;
-
- spin_lock_init(&ctx->req_lock);
- ctx->qp = qp;
- ctx->dev = &qp->qm->pdev->dev;
-
- hpre = container_of(ctx->qp->qm, struct hpre, qm);
- ctx->hpre = hpre;
- ctx->req_list = kcalloc(qlen, sizeof(void *), GFP_KERNEL);
- if (!ctx->req_list)
- return -ENOMEM;
- ctx->key_sz = 0;
- ctx->crt_g2_mode = false;
- idr_init(&ctx->req_idr);
-
- return 0;
-}
-
static void hpre_ctx_clear(struct hpre_ctx *ctx, bool is_clear_all)
{
- if (is_clear_all) {
- idr_destroy(&ctx->req_idr);
- kfree(ctx->req_list);
+ if (is_clear_all)
hisi_qm_free_qps(&ctx->qp, 1);
- }

ctx->crt_g2_mode = false;
ctx->key_sz = 0;
@@ -467,49 +367,44 @@ static void hpre_rsa_cb(struct hpre_ctx *ctx, void *resp)

static void hpre_alg_cb(struct hisi_qp *qp, void *resp)
{
- struct hpre_ctx *ctx = qp->qp_ctx;
- struct hpre_dfx *dfx = ctx->hpre->debug.dfx;
+ struct hpre_asym_request *h_req;
struct hpre_sqe *sqe = resp;
- struct hpre_asym_request *req = ctx->req_list[le16_to_cpu(sqe->tag)];

- if (unlikely(!req)) {
- atomic64_inc(&dfx[HPRE_INVALID_REQ_CNT].value);
+ h_req = (struct hpre_asym_request *)le64_to_cpu(sqe->tag);
+ if (unlikely(!h_req)) {
+ pr_err("Failed to get request, and qp_id is %u\n", qp->qp_id);
return;
}

- req->cb(ctx, resp);
-}
-
-static void hpre_stop_qp_and_put(struct hisi_qp *qp)
-{
- hisi_qm_stop_qp(qp);
- hisi_qm_free_qps(&qp, 1);
+ h_req->cb(h_req->ctx, resp);
}

static int hpre_ctx_init(struct hpre_ctx *ctx, u8 type)
{
struct hisi_qp *qp;
- int ret;
+ struct hpre *hpre;

- qp = hpre_get_qp_and_start(type);
- if (IS_ERR(qp))
- return PTR_ERR(qp);
+ qp = hpre_create_qp(type);
+ if (!qp) {
+ ctx->qp = NULL;
+ return -ENODEV;
+ }

- qp->qp_ctx = ctx;
qp->req_cb = hpre_alg_cb;
+ ctx->qp = qp;
+ ctx->dev = &qp->qm->pdev->dev;
+ hpre = container_of(ctx->qp->qm, struct hpre, qm);
+ ctx->hpre = hpre;
+ ctx->key_sz = 0;
+ ctx->crt_g2_mode = false;

- ret = hpre_ctx_set(ctx, qp, qp->sq_depth);
- if (ret)
- hpre_stop_qp_and_put(qp);
-
- return ret;
+ return 0;
}

static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa)
{
struct hpre_asym_request *h_req;
struct hpre_sqe *msg;
- int req_id;
void *tmp;

if (is_rsa) {
@@ -549,11 +444,8 @@ static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa)
msg->task_len1 = (ctx->key_sz >> HPRE_BITS_2_BYTES_SHIFT) - 1;
h_req->ctx = ctx;

- req_id = hpre_add_req_to_ctx(h_req);
- if (req_id < 0)
- return -EBUSY;
-
- msg->tag = cpu_to_le16((u16)req_id);
+ hpre_dfx_add_req_time(h_req);
+ msg->tag = cpu_to_le64((uintptr_t)h_req);

return 0;
}
@@ -566,9 +458,7 @@ static int hpre_send(struct hpre_ctx *ctx, struct hpre_sqe *msg)

do {
atomic64_inc(&dfx[HPRE_SEND_CNT].value);
- spin_lock_bh(&ctx->req_lock);
ret = hisi_qp_send(ctx->qp, msg);
- spin_unlock_bh(&ctx->req_lock);
if (ret != -EBUSY)
break;
atomic64_inc(&dfx[HPRE_SEND_BUSY_CNT].value);
@@ -619,12 +509,53 @@ static int hpre_dh_compute_value(struct kpp_request *req)
return -EINPROGRESS;

clear_all:
- hpre_rm_req_from_ctx(hpre_req);
hpre_hw_data_clr_all(ctx, hpre_req, req->dst, req->src);

return ret;
}

+static struct kpp_request *hpre_dh_prepare_fb_req(struct kpp_request *req)
+{
+ struct kpp_request *fb_req = kpp_request_ctx(req);
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+
+ kpp_request_set_tfm(fb_req, ctx->dh.soft_tfm);
+ kpp_request_set_callback(fb_req, req->base.flags, req->base.complete, req->base.data);
+ kpp_request_set_input(fb_req, req->src, req->src_len);
+ kpp_request_set_output(fb_req, req->dst, req->dst_len);
+
+ return fb_req;
+}
+
+static int hpre_dh_generate_public_key(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+ struct kpp_request *fb_req;
+
+ if (ctx->fallback) {
+ fb_req = hpre_dh_prepare_fb_req(req);
+ return crypto_kpp_generate_public_key(fb_req);
+ }
+
+ return hpre_dh_compute_value(req);
+}
+
+static int hpre_dh_compute_shared_secret(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+ struct kpp_request *fb_req;
+
+ if (ctx->fallback) {
+ fb_req = hpre_dh_prepare_fb_req(req);
+ return crypto_kpp_compute_shared_secret(fb_req);
+ }
+
+ return hpre_dh_compute_value(req);
+}
+
static int hpre_is_dh_params_length_valid(unsigned int key_sz)
{
#define _HPRE_DH_GRP1 768
@@ -651,13 +582,6 @@ static int hpre_dh_set_params(struct hpre_ctx *ctx, struct dh *params)
struct device *dev = ctx->dev;
unsigned int sz;

- if (params->p_size > HPRE_DH_MAX_P_SZ)
- return -EINVAL;
-
- if (hpre_is_dh_params_length_valid(params->p_size <<
- HPRE_BITS_2_BYTES_SHIFT))
- return -EINVAL;
-
sz = ctx->key_sz = params->p_size;
ctx->dh.xa_p = dma_alloc_coherent(dev, sz << 1,
&ctx->dh.dma_xa_p, GFP_KERNEL);
@@ -690,8 +614,8 @@ static void hpre_dh_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all)
struct device *dev = ctx->dev;
unsigned int sz = ctx->key_sz;

- if (is_clear_all)
- hisi_qm_stop_qp(ctx->qp);
+ if (!ctx->qp)
+ return;

if (ctx->dh.g) {
dma_free_coherent(dev, sz, ctx->dh.g, ctx->dh.dma_g);
@@ -718,6 +642,13 @@ static int hpre_dh_set_secret(struct crypto_kpp *tfm, const void *buf,
if (crypto_dh_decode_key(buf, len, &params) < 0)
return -EINVAL;

+ if (!ctx->qp)
+ goto set_soft_secret;
+
+ if (hpre_is_dh_params_length_valid(params.p_size <<
+ HPRE_BITS_2_BYTES_SHIFT))
+ goto set_soft_secret;
+
/* Free old secret if any */
hpre_dh_clear_ctx(ctx, false);

@@ -728,27 +659,55 @@ static int hpre_dh_set_secret(struct crypto_kpp *tfm, const void *buf,
memcpy(ctx->dh.xa_p + (ctx->key_sz - params.key_size), params.key,
params.key_size);

+ ctx->fallback = false;
return 0;

err_clear_ctx:
hpre_dh_clear_ctx(ctx, false);
return ret;
+set_soft_secret:
+ ctx->fallback = true;
+ return crypto_kpp_set_secret(ctx->dh.soft_tfm, buf, len);
}

static unsigned int hpre_dh_max_size(struct crypto_kpp *tfm)
{
struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);

+ if (ctx->fallback)
+ return crypto_kpp_maxsize(ctx->dh.soft_tfm);
+
return ctx->key_sz;
}

static int hpre_dh_init_tfm(struct crypto_kpp *tfm)
{
struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+ const char *alg = kpp_alg_name(tfm);
+ unsigned int reqsize;
+ int ret;
+
+ ctx->dh.soft_tfm = crypto_alloc_kpp(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->dh.soft_tfm)) {
+ pr_err("Failed to alloc dh tfm!\n");
+ return PTR_ERR(ctx->dh.soft_tfm);
+ }

- kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd());
+ crypto_kpp_set_flags(ctx->dh.soft_tfm, crypto_kpp_get_flags(tfm));

- return hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE);
+ reqsize = max(sizeof(struct hpre_asym_request) + hpre_align_pd(),
+ sizeof(struct kpp_request) + crypto_kpp_reqsize(ctx->dh.soft_tfm));
+ kpp_set_reqsize(tfm, reqsize);
+
+ ret = hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE);
+ if (ret && ret != -ENODEV) {
+ crypto_free_kpp(ctx->dh.soft_tfm);
+ return ret;
+ } else if (ret == -ENODEV) {
+ ctx->fallback = true;
+ }
+
+ return 0;
}

static void hpre_dh_exit_tfm(struct crypto_kpp *tfm)
@@ -756,6 +715,7 @@ static void hpre_dh_exit_tfm(struct crypto_kpp *tfm)
struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);

hpre_dh_clear_ctx(ctx, true);
+ crypto_free_kpp(ctx->dh.soft_tfm);
}

static void hpre_rsa_drop_leading_zeros(const char **ptr, size_t *len)
@@ -795,9 +755,8 @@ static int hpre_rsa_enc(struct akcipher_request *req)
struct hpre_sqe *msg = &hpre_req->req;
int ret;

- /* For 512 and 1536 bits key size, use soft tfm instead */
- if (ctx->key_sz == HPRE_RSA_512BITS_KSZ ||
- ctx->key_sz == HPRE_RSA_1536BITS_KSZ) {
+ /* For unsupported key size and unavailable devices, use soft tfm instead */
+ if (ctx->fallback) {
akcipher_request_set_tfm(req, ctx->rsa.soft_tfm);
ret = crypto_akcipher_encrypt(req);
akcipher_request_set_tfm(req, tfm);
@@ -828,7 +787,6 @@ static int hpre_rsa_enc(struct akcipher_request *req)
return -EINPROGRESS;

clear_all:
- hpre_rm_req_from_ctx(hpre_req);
hpre_hw_data_clr_all(ctx, hpre_req, req->dst, req->src);

return ret;
@@ -843,9 +801,8 @@ static int hpre_rsa_dec(struct akcipher_request *req)
struct hpre_sqe *msg = &hpre_req->req;
int ret;

- /* For 512 and 1536 bits key size, use soft tfm instead */
- if (ctx->key_sz == HPRE_RSA_512BITS_KSZ ||
- ctx->key_sz == HPRE_RSA_1536BITS_KSZ) {
+ /* For unsupported key size and unavailable devices, use soft tfm instead */
+ if (ctx->fallback) {
akcipher_request_set_tfm(req, ctx->rsa.soft_tfm);
ret = crypto_akcipher_decrypt(req);
akcipher_request_set_tfm(req, tfm);
@@ -883,7 +840,6 @@ static int hpre_rsa_dec(struct akcipher_request *req)
return -EINPROGRESS;

clear_all:
- hpre_rm_req_from_ctx(hpre_req);
hpre_hw_data_clr_all(ctx, hpre_req, req->dst, req->src);

return ret;
@@ -899,8 +855,10 @@ static int hpre_rsa_set_n(struct hpre_ctx *ctx, const char *value,
ctx->key_sz = vlen;

/* if invalid key size provided, we use software tfm */
- if (!hpre_rsa_key_size_is_support(ctx->key_sz))
+ if (!hpre_rsa_key_size_is_support(ctx->key_sz)) {
+ ctx->fallback = true;
return 0;
+ }

ctx->rsa.pubkey = dma_alloc_coherent(ctx->dev, vlen << 1,
&ctx->rsa.dma_pubkey,
@@ -1035,8 +993,8 @@ static void hpre_rsa_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all)
unsigned int half_key_sz = ctx->key_sz >> 1;
struct device *dev = ctx->dev;

- if (is_clear_all)
- hisi_qm_stop_qp(ctx->qp);
+ if (!ctx->qp)
+ return;

if (ctx->rsa.pubkey) {
dma_free_coherent(dev, ctx->key_sz << 1,
@@ -1117,6 +1075,7 @@ static int hpre_rsa_setkey(struct hpre_ctx *ctx, const void *key,
goto free;
}

+ ctx->fallback = false;
return 0;

free:
@@ -1134,6 +1093,9 @@ static int hpre_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key,
if (ret)
return ret;

+ if (!ctx->qp)
+ return 0;
+
return hpre_rsa_setkey(ctx, key, keylen, false);
}

@@ -1147,6 +1109,9 @@ static int hpre_rsa_setprivkey(struct crypto_akcipher *tfm, const void *key,
if (ret)
return ret;

+ if (!ctx->qp)
+ return 0;
+
return hpre_rsa_setkey(ctx, key, keylen, true);
}

@@ -1154,9 +1119,8 @@ static unsigned int hpre_rsa_max_size(struct crypto_akcipher *tfm)
{
struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm);

- /* For 512 and 1536 bits key size, use soft tfm instead */
- if (ctx->key_sz == HPRE_RSA_512BITS_KSZ ||
- ctx->key_sz == HPRE_RSA_1536BITS_KSZ)
+ /* For unsupported key size and unavailable devices, use soft tfm instead */
+ if (ctx->fallback)
return crypto_akcipher_maxsize(ctx->rsa.soft_tfm);

return ctx->key_sz;
@@ -1177,10 +1141,14 @@ static int hpre_rsa_init_tfm(struct crypto_akcipher *tfm)
hpre_align_pd());

ret = hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE);
- if (ret)
+ if (ret && ret != -ENODEV) {
crypto_free_akcipher(ctx->rsa.soft_tfm);
+ return ret;
+ } else if (ret == -ENODEV) {
+ ctx->fallback = true;
+ }

- return ret;
+ return 0;
}

static void hpre_rsa_exit_tfm(struct crypto_akcipher *tfm)
@@ -1207,9 +1175,6 @@ static void hpre_ecc_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all)
unsigned int sz = ctx->key_sz;
unsigned int shift = sz << 1;

- if (is_clear_all)
- hisi_qm_stop_qp(ctx->qp);
-
if (ctx->ecdh.p) {
/* ecdh: p->a->k->b */
memzero_explicit(ctx->ecdh.p + shift, sz);
@@ -1346,7 +1311,7 @@ static int hpre_ecdh_set_param(struct hpre_ctx *ctx, struct ecdh *params)
return 0;
}

-static bool hpre_key_is_zero(char *key, unsigned short key_sz)
+static bool hpre_key_is_zero(const char *key, unsigned short key_sz)
{
int i;

@@ -1387,6 +1352,9 @@ static int hpre_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
struct ecdh params;
int ret;

+ if (ctx->fallback)
+ return crypto_kpp_set_secret(ctx->ecdh.soft_tfm, buf, len);
+
if (crypto_ecdh_decode_key(buf, len, &params) < 0) {
dev_err(dev, "failed to decode ecdh key!\n");
return -EINVAL;
@@ -1488,7 +1456,6 @@ static int hpre_ecdh_msg_request_set(struct hpre_ctx *ctx,
{
struct hpre_asym_request *h_req;
struct hpre_sqe *msg;
- int req_id;
void *tmp;

if (req->dst_len < ctx->key_sz << 1) {
@@ -1510,11 +1477,8 @@ static int hpre_ecdh_msg_request_set(struct hpre_ctx *ctx,
msg->task_len1 = (ctx->key_sz >> HPRE_BITS_2_BYTES_SHIFT) - 1;
h_req->ctx = ctx;

- req_id = hpre_add_req_to_ctx(h_req);
- if (req_id < 0)
- return -EBUSY;
-
- msg->tag = cpu_to_le16((u16)req_id);
+ hpre_dfx_add_req_time(h_req);
+ msg->tag = cpu_to_le64((uintptr_t)h_req);
return 0;
}

@@ -1612,28 +1576,86 @@ static int hpre_ecdh_compute_value(struct kpp_request *req)
return -EINPROGRESS;

clear_all:
- hpre_rm_req_from_ctx(hpre_req);
hpre_ecdh_hw_data_clr_all(ctx, hpre_req, req->dst, req->src);
return ret;
}

+static int hpre_ecdh_generate_public_key(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+ int ret;
+
+ if (ctx->fallback) {
+ kpp_request_set_tfm(req, ctx->ecdh.soft_tfm);
+ ret = crypto_kpp_generate_public_key(req);
+ kpp_request_set_tfm(req, tfm);
+ return ret;
+ }
+
+ return hpre_ecdh_compute_value(req);
+}
+
+static int hpre_ecdh_compute_shared_secret(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+ int ret;
+
+ if (ctx->fallback) {
+ kpp_request_set_tfm(req, ctx->ecdh.soft_tfm);
+ ret = crypto_kpp_compute_shared_secret(req);
+ kpp_request_set_tfm(req, tfm);
+ return ret;
+ }
+
+ return hpre_ecdh_compute_value(req);
+}
+
static unsigned int hpre_ecdh_max_size(struct crypto_kpp *tfm)
{
struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);

+ if (ctx->fallback)
+ return crypto_kpp_maxsize(ctx->ecdh.soft_tfm);
+
/* max size is the pub_key_size, include x and y */
return ctx->key_sz << 1;
}

+static int hpre_ecdh_init_tfm(struct crypto_kpp *tfm)
+{
+ struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+ const char *alg = kpp_alg_name(tfm);
+ int ret;
+
+ ret = hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
+ if (!ret) {
+ kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd());
+ return 0;
+ } else if (ret && ret != -ENODEV) {
+ return ret;
+ }
+
+ ctx->ecdh.soft_tfm = crypto_alloc_kpp(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->ecdh.soft_tfm)) {
+ pr_err("Failed to alloc %s tfm!\n", alg);
+ return PTR_ERR(ctx->ecdh.soft_tfm);
+ }
+
+ crypto_kpp_set_flags(ctx->ecdh.soft_tfm, crypto_kpp_get_flags(tfm));
+ ctx->fallback = true;
+
+ return 0;
+}
+
static int hpre_ecdh_nist_p192_init_tfm(struct crypto_kpp *tfm)
{
struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);

ctx->curve_id = ECC_CURVE_NIST_P192;

- kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd());
-
- return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
+ return hpre_ecdh_init_tfm(tfm);
}

static int hpre_ecdh_nist_p256_init_tfm(struct crypto_kpp *tfm)
@@ -1643,9 +1665,7 @@ static int hpre_ecdh_nist_p256_init_tfm(struct crypto_kpp *tfm)
ctx->curve_id = ECC_CURVE_NIST_P256;
ctx->enable_hpcore = 1;

- kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd());
-
- return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
+ return hpre_ecdh_init_tfm(tfm);
}

static int hpre_ecdh_nist_p384_init_tfm(struct crypto_kpp *tfm)
@@ -1654,15 +1674,18 @@ static int hpre_ecdh_nist_p384_init_tfm(struct crypto_kpp *tfm)

ctx->curve_id = ECC_CURVE_NIST_P384;

- kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd());
-
- return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
+ return hpre_ecdh_init_tfm(tfm);
}

static void hpre_ecdh_exit_tfm(struct crypto_kpp *tfm)
{
struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);

+ if (ctx->fallback) {
+ crypto_free_kpp(ctx->ecdh.soft_tfm);
+ return;
+ }
+
hpre_ecc_clear_ctx(ctx, true);
}

@@ -1680,13 +1703,14 @@ static struct akcipher_alg rsa = {
.cra_name = "rsa",
.cra_driver_name = "hpre-rsa",
.cra_module = THIS_MODULE,
+ .cra_flags = CRYPTO_ALG_NEED_FALLBACK,
},
};

static struct kpp_alg dh = {
.set_secret = hpre_dh_set_secret,
- .generate_public_key = hpre_dh_compute_value,
- .compute_shared_secret = hpre_dh_compute_value,
+ .generate_public_key = hpre_dh_generate_public_key,
+ .compute_shared_secret = hpre_dh_compute_shared_secret,
.max_size = hpre_dh_max_size,
.init = hpre_dh_init_tfm,
.exit = hpre_dh_exit_tfm,
@@ -1696,14 +1720,15 @@ static struct kpp_alg dh = {
.cra_name = "dh",
.cra_driver_name = "hpre-dh",
.cra_module = THIS_MODULE,
+ .cra_flags = CRYPTO_ALG_NEED_FALLBACK,
},
};

static struct kpp_alg ecdh_curves[] = {
{
.set_secret = hpre_ecdh_set_secret,
- .generate_public_key = hpre_ecdh_compute_value,
- .compute_shared_secret = hpre_ecdh_compute_value,
+ .generate_public_key = hpre_ecdh_generate_public_key,
+ .compute_shared_secret = hpre_ecdh_compute_shared_secret,
.max_size = hpre_ecdh_max_size,
.init = hpre_ecdh_nist_p192_init_tfm,
.exit = hpre_ecdh_exit_tfm,
@@ -1713,11 +1738,12 @@ static struct kpp_alg ecdh_curves[] = {
.cra_name = "ecdh-nist-p192",
.cra_driver_name = "hpre-ecdh-nist-p192",
.cra_module = THIS_MODULE,
+ .cra_flags = CRYPTO_ALG_NEED_FALLBACK,
},
}, {
.set_secret = hpre_ecdh_set_secret,
- .generate_public_key = hpre_ecdh_compute_value,
- .compute_shared_secret = hpre_ecdh_compute_value,
+ .generate_public_key = hpre_ecdh_generate_public_key,
+ .compute_shared_secret = hpre_ecdh_compute_shared_secret,
.max_size = hpre_ecdh_max_size,
.init = hpre_ecdh_nist_p256_init_tfm,
.exit = hpre_ecdh_exit_tfm,
@@ -1727,11 +1753,12 @@ static struct kpp_alg ecdh_curves[] = {
.cra_name = "ecdh-nist-p256",
.cra_driver_name = "hpre-ecdh-nist-p256",
.cra_module = THIS_MODULE,
+ .cra_flags = CRYPTO_ALG_NEED_FALLBACK,
},
}, {
.set_secret = hpre_ecdh_set_secret,
- .generate_public_key = hpre_ecdh_compute_value,
- .compute_shared_secret = hpre_ecdh_compute_value,
+ .generate_public_key = hpre_ecdh_generate_public_key,
+ .compute_shared_secret = hpre_ecdh_compute_shared_secret,
.max_size = hpre_ecdh_max_size,
.init = hpre_ecdh_nist_p384_init_tfm,
.exit = hpre_ecdh_exit_tfm,
@@ -1741,6 +1768,7 @@ static struct kpp_alg ecdh_curves[] = {
.cra_name = "ecdh-nist-p384",
.cra_driver_name = "hpre-ecdh-nist-p384",
.cra_module = THIS_MODULE,
+ .cra_flags = CRYPTO_ALG_NEED_FALLBACK,
},
}
};
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index b94fecd765ee..884d5d0afaf4 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -465,7 +465,7 @@ struct hisi_qp *hpre_create_qp(u8 type)
* type: 0 - RSA/DH. algorithm supported in V2,
* 1 - ECC algorithm in V3.
*/
- ret = hisi_qm_alloc_qps_node(&hpre_devices, 1, type, node, &qp);
+ ret = hisi_qm_alloc_qps_node(&hpre_devices, 1, &type, node, &qp);
if (!ret)
return qp;

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 0968304c0cb5..a7c8839180ee 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -2210,6 +2210,7 @@ static void qp_stop_fail_cb(struct hisi_qp *qp)
for (i = 0; i < qp_used; i++) {
pos = (i + cur_head) % sq_depth;
qp->req_cb(qp, qp->sqe + (u32)(qm->sqe_size * pos));
+ qm_cq_head_update(qp);
atomic_dec(&qp->qp_status.used);
}
}
@@ -2359,25 +2360,33 @@ EXPORT_SYMBOL_GPL(hisi_qm_stop_qp);
int hisi_qp_send(struct hisi_qp *qp, const void *msg)
{
struct hisi_qp_status *qp_status = &qp->qp_status;
- u16 sq_tail = qp_status->sq_tail;
- u16 sq_tail_next = (sq_tail + 1) % qp->sq_depth;
- void *sqe = qm_get_avail_sqe(qp);
+ u16 sq_tail, sq_tail_next;
+ void *sqe;

+ spin_lock_bh(&qp->qp_lock);
if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP ||
atomic_read(&qp->qm->status.flags) == QM_STOP ||
qp->is_resetting)) {
+ spin_unlock_bh(&qp->qp_lock);
dev_info_ratelimited(&qp->qm->pdev->dev, "QP is stopped or resetting\n");
return -EAGAIN;
}

- if (!sqe)
+ sqe = qm_get_avail_sqe(qp);
+ if (!sqe) {
+ spin_unlock_bh(&qp->qp_lock);
return -EBUSY;
+ }

+ sq_tail = qp_status->sq_tail;
+ sq_tail_next = (sq_tail + 1) % qp->sq_depth;
memcpy(sqe, msg, qp->qm->sqe_size);
+ qp->msg[sq_tail] = msg;

qm_db(qp->qm, qp->qp_id, QM_DOORBELL_CMD_SQ, sq_tail_next, 0);
atomic_inc(&qp->qp_status.used);
qp_status->sq_tail = sq_tail_next;
+ spin_unlock_bh(&qp->qp_lock);

return 0;
}
@@ -2907,12 +2916,13 @@ EXPORT_SYMBOL_GPL(hisi_qm_wait_task_finish);
static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num)
{
struct device *dev = &qm->pdev->dev;
- struct qm_dma *qdma;
+ struct hisi_qp *qp;
int i;

for (i = num - 1; i >= 0; i--) {
- qdma = &qm->qp_array[i].qdma;
- dma_free_coherent(dev, qdma->size, qdma->va, qdma->dma);
+ qp = &qm->qp_array[i];
+ dma_free_coherent(dev, qp->qdma.size, qp->qdma.va, qp->qdma.dma);
+ kfree(qp->msg);
kfree(qm->poll_data[i].qp_finish_id);
}

@@ -2934,10 +2944,14 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id,
return -ENOMEM;

qp = &qm->qp_array[id];
+ qp->msg = kmalloc_array(sq_depth, sizeof(void *), GFP_KERNEL);
+ if (!qp->msg)
+ goto err_free_qp_finish_id;
+
qp->qdma.va = dma_alloc_coherent(dev, dma_size, &qp->qdma.dma,
GFP_KERNEL);
if (!qp->qdma.va)
- goto err_free_qp_finish_id;
+ goto err_free_qp_msg;

qp->sqe = qp->qdma.va;
qp->sqe_dma = qp->qdma.dma;
@@ -2949,8 +2963,14 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id,
qp->qm = qm;
qp->qp_id = id;

+ spin_lock_init(&qp->qp_lock);
+ spin_lock_init(&qp->backlog.lock);
+ INIT_LIST_HEAD(&qp->backlog.list);
+
return 0;

+err_free_qp_msg:
+ kfree(qp->msg);
err_free_qp_finish_id:
kfree(qm->poll_data[id].qp_finish_id);
return ret;
@@ -3496,6 +3516,14 @@ void hisi_qm_dev_err_uninit(struct hisi_qm *qm)
}
EXPORT_SYMBOL_GPL(hisi_qm_dev_err_uninit);

+static void qm_release_qp_nolock(struct hisi_qp *qp)
+{
+ struct hisi_qm *qm = qp->qm;
+
+ qm->qp_in_used--;
+ idr_remove(&qm->qp_idr, qp->qp_id);
+}
+
/**
* hisi_qm_free_qps() - free multiple queue pairs.
* @qps: The queue pairs need to be freed.
@@ -3508,8 +3536,15 @@ void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num)
if (!qps || qp_num <= 0)
return;

- for (i = qp_num - 1; i >= 0; i--)
- hisi_qm_release_qp(qps[i]);
+ down_write(&qps[0]->qm->qps_lock);
+
+ for (i = qp_num - 1; i >= 0; i--) {
+ qm_stop_qp_nolock(qps[i]);
+ qm_release_qp_nolock(qps[i]);
+ }
+
+ up_write(&qps[0]->qm->qps_lock);
+ qm_pm_put_sync(qps[0]->qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_free_qps);

@@ -3523,6 +3558,43 @@ static void free_list(struct list_head *head)
}
}

+static int qm_get_and_start_qp(struct hisi_qm *qm, int qp_num, struct hisi_qp **qps, u8 *alg_type)
+{
+ int i, ret;
+
+ ret = qm_pm_get_sync(qm);
+ if (ret)
+ return ret;
+
+ down_write(&qm->qps_lock);
+ for (i = 0; i < qp_num; i++) {
+ qps[i] = qm_create_qp_nolock(qm, alg_type[i]);
+ if (IS_ERR(qps[i])) {
+ ret = -ENODEV;
+ goto stop_and_free;
+ }
+
+ ret = qm_start_qp_nolock(qps[i], 0);
+ if (ret) {
+ qm_release_qp_nolock(qps[i]);
+ goto stop_and_free;
+ }
+ }
+ up_write(&qm->qps_lock);
+
+ return 0;
+
+stop_and_free:
+ for (i--; i >= 0; i--) {
+ qm_stop_qp_nolock(qps[i]);
+ qm_release_qp_nolock(qps[i]);
+ }
+ up_write(&qm->qps_lock);
+ qm_pm_put_sync(qm);
+
+ return ret;
+}
+
static int hisi_qm_sort_devices(int node, struct list_head *head,
struct hisi_qm_list *qm_list)
{
@@ -3571,12 +3643,11 @@ static int hisi_qm_sort_devices(int node, struct list_head *head,
* not meet the requirements will return error.
*/
int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num,
- u8 alg_type, int node, struct hisi_qp **qps)
+ u8 *alg_type, int node, struct hisi_qp **qps)
{
struct hisi_qm_resource *tmp;
int ret = -ENODEV;
LIST_HEAD(head);
- int i;

if (!qps || !qm_list || qp_num <= 0)
return -EINVAL;
@@ -3588,24 +3659,15 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num,
}

list_for_each_entry(tmp, &head, list) {
- for (i = 0; i < qp_num; i++) {
- qps[i] = hisi_qm_create_qp(tmp->qm, alg_type);
- if (IS_ERR(qps[i])) {
- hisi_qm_free_qps(qps, i);
- break;
- }
- }
-
- if (i == qp_num) {
- ret = 0;
+ ret = qm_get_and_start_qp(tmp->qm, qp_num, qps, alg_type);
+ if (!ret)
break;
- }
}

mutex_unlock(&qm_list->lock);
if (ret)
- pr_info("Failed to create qps, node[%d], alg[%u], qp[%d]!\n",
- node, alg_type, qp_num);
+ pr_info("Failed to create qps, node[%d], qp[%d]!\n",
+ node, qp_num);

err:
free_list(&head);
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 81d0beda93b2..0710977861f3 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -82,11 +82,6 @@ struct sec_aead_req {
__u8 out_mac_buf[SEC_MAX_MAC_LEN];
};

-struct sec_instance_backlog {
- struct list_head list;
- spinlock_t lock;
-};
-
/* SEC request of Crypto */
struct sec_req {
union {
@@ -112,7 +107,6 @@ struct sec_req {
bool use_pbuf;

struct list_head list;
- struct sec_instance_backlog *backlog;
struct sec_request_buf buf;
};

@@ -172,7 +166,6 @@ struct sec_qp_ctx {
spinlock_t id_lock;
struct hisi_acc_sgl_pool *c_in_pool;
struct hisi_acc_sgl_pool *c_out_pool;
- struct sec_instance_backlog backlog;
u16 send_head;
};

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 31590d01139a..c462b58d3034 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -54,7 +54,6 @@
#define SEC_AUTH_CIPHER_V3 0x40
#define SEC_FLAG_OFFSET 7
#define SEC_FLAG_MASK 0x0780
-#define SEC_TYPE_MASK 0x0F
#define SEC_DONE_MASK 0x0001
#define SEC_ICV_MASK 0x000E

@@ -148,7 +147,7 @@ static void sec_free_req_id(struct sec_req *req)
spin_unlock_bh(&qp_ctx->id_lock);
}

-static u8 pre_parse_finished_bd(struct bd_status *status, void *resp)
+static void pre_parse_finished_bd(struct bd_status *status, void *resp)
{
struct sec_sqe *bd = resp;

@@ -158,11 +157,9 @@ static u8 pre_parse_finished_bd(struct bd_status *status, void *resp)
SEC_FLAG_MASK) >> SEC_FLAG_OFFSET;
status->tag = le16_to_cpu(bd->type2.tag);
status->err_type = bd->type2.error_type;
-
- return bd->type_cipher_auth & SEC_TYPE_MASK;
}

-static u8 pre_parse_finished_bd3(struct bd_status *status, void *resp)
+static void pre_parse_finished_bd3(struct bd_status *status, void *resp)
{
struct sec_sqe3 *bd3 = resp;

@@ -172,8 +169,6 @@ static u8 pre_parse_finished_bd3(struct bd_status *status, void *resp)
SEC_FLAG_MASK) >> SEC_FLAG_OFFSET;
status->tag = le64_to_cpu(bd3->tag);
status->err_type = bd3->error_type;
-
- return le32_to_cpu(bd3->bd_param) & SEC_TYPE_MASK;
}

static int sec_cb_status_check(struct sec_req *req,
@@ -244,7 +239,7 @@ static void sec_alg_send_backlog_soft(struct sec_ctx *ctx, struct sec_qp_ctx *qp
struct sec_req *req, *tmp;
int ret;

- list_for_each_entry_safe(req, tmp, &qp_ctx->backlog.list, list) {
+ list_for_each_entry_safe(req, tmp, &qp_ctx->qp->backlog.list, list) {
list_del(&req->list);
ctx->req_op->buf_unmap(ctx, req);
if (req->req_id >= 0)
@@ -265,11 +260,12 @@ static void sec_alg_send_backlog_soft(struct sec_ctx *ctx, struct sec_qp_ctx *qp

static void sec_alg_send_backlog(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx)
{
+ struct hisi_qp *qp = qp_ctx->qp;
struct sec_req *req, *tmp;
int ret;

- spin_lock_bh(&qp_ctx->backlog.lock);
- list_for_each_entry_safe(req, tmp, &qp_ctx->backlog.list, list) {
+ spin_lock_bh(&qp->backlog.lock);
+ list_for_each_entry_safe(req, tmp, &qp->backlog.list, list) {
ret = qp_send_message(req);
switch (ret) {
case -EINPROGRESS:
@@ -287,42 +283,46 @@ static void sec_alg_send_backlog(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx)
}

unlock:
- spin_unlock_bh(&qp_ctx->backlog.lock);
+ spin_unlock_bh(&qp->backlog.lock);
}

static void sec_req_cb(struct hisi_qp *qp, void *resp)
{
- struct sec_qp_ctx *qp_ctx = qp->qp_ctx;
- struct sec_dfx *dfx = &qp_ctx->ctx->sec->debug.dfx;
- u8 type_supported = qp_ctx->ctx->type_supported;
+ const struct sec_sqe *sqe = qp->msg[qp->qp_status.cq_head];
+ struct sec_req *req = container_of(sqe, struct sec_req, sec_sqe);
+ struct sec_ctx *ctx = req->ctx;
+ struct sec_dfx *dfx = &ctx->sec->debug.dfx;
struct bd_status status;
- struct sec_ctx *ctx;
- struct sec_req *req;
int err;
- u8 type;

- if (type_supported == SEC_BD_TYPE2) {
- type = pre_parse_finished_bd(&status, resp);
- req = qp_ctx->req_list[status.tag];
- } else {
- type = pre_parse_finished_bd3(&status, resp);
- req = (void *)(uintptr_t)status.tag;
- }
+ pre_parse_finished_bd(&status, resp);

- if (unlikely(type != type_supported)) {
- atomic64_inc(&dfx->err_bd_cnt);
- pr_err("err bd type [%u]\n", type);
- return;
- }
+ req->err_type = status.err_type;
+ err = sec_cb_status_check(req, &status);
+ if (err)
+ atomic64_inc(&dfx->done_flag_cnt);

- if (unlikely(!req)) {
- atomic64_inc(&dfx->invalid_req_cnt);
- atomic_inc(&qp->qp_status.used);
- return;
- }
+ atomic64_inc(&dfx->recv_cnt);

+ ctx->req_op->buf_unmap(ctx, req);
+ ctx->req_op->callback(ctx, req, err);
+}
+
+static void sec_req_cb3(struct hisi_qp *qp, void *resp)
+{
+ struct bd_status status;
+ struct sec_ctx *ctx;
+ struct sec_dfx *dfx;
+ struct sec_req *req;
+ int err;
+
+ pre_parse_finished_bd3(&status, resp);
+
+ req = (void *)(uintptr_t)status.tag;
req->err_type = status.err_type;
ctx = req->ctx;
+ dfx = &ctx->sec->debug.dfx;
+
err = sec_cb_status_check(req, &status);
if (err)
atomic64_inc(&dfx->done_flag_cnt);
@@ -330,7 +330,6 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
atomic64_inc(&dfx->recv_cnt);

ctx->req_op->buf_unmap(ctx, req);
-
ctx->req_op->callback(ctx, req, err);
}

@@ -348,8 +347,10 @@ static int sec_alg_send_message_retry(struct sec_req *req)

static int sec_alg_try_enqueue(struct sec_req *req)
{
+ struct hisi_qp *qp = req->qp_ctx->qp;
+
/* Check if any request is already backlogged */
- if (!list_empty(&req->backlog->list))
+ if (!list_empty(&qp->backlog.list))
return -EBUSY;

/* Try to enqueue to HW ring */
@@ -359,17 +360,18 @@ static int sec_alg_try_enqueue(struct sec_req *req)

static int sec_alg_send_message_maybacklog(struct sec_req *req)
{
+ struct hisi_qp *qp = req->qp_ctx->qp;
int ret;

ret = sec_alg_try_enqueue(req);
if (ret != -EBUSY)
return ret;

- spin_lock_bh(&req->backlog->lock);
+ spin_lock_bh(&qp->backlog.lock);
ret = sec_alg_try_enqueue(req);
if (ret == -EBUSY)
- list_add_tail(&req->list, &req->backlog->list);
- spin_unlock_bh(&req->backlog->lock);
+ list_add_tail(&req->list, &qp->backlog.list);
+ spin_unlock_bh(&qp->backlog.lock);

return ret;
}
@@ -624,32 +626,25 @@ static int sec_create_qp_ctx(struct sec_ctx *ctx, int qp_ctx_id)

qp_ctx = &ctx->qp_ctx[qp_ctx_id];
qp = ctx->qps[qp_ctx_id];
- qp->req_type = 0;
- qp->qp_ctx = qp_ctx;
qp_ctx->qp = qp;
qp_ctx->ctx = ctx;

- qp->req_cb = sec_req_cb;
+ if (ctx->type_supported == SEC_BD_TYPE3)
+ qp->req_cb = sec_req_cb3;
+ else
+ qp->req_cb = sec_req_cb;

spin_lock_init(&qp_ctx->req_lock);
idr_init(&qp_ctx->req_idr);
- spin_lock_init(&qp_ctx->backlog.lock);
spin_lock_init(&qp_ctx->id_lock);
- INIT_LIST_HEAD(&qp_ctx->backlog.list);
qp_ctx->send_head = 0;

ret = sec_alloc_qp_ctx_resource(ctx, qp_ctx);
if (ret)
goto err_destroy_idr;

- ret = hisi_qm_start_qp(qp, 0);
- if (ret < 0)
- goto err_resource_free;
-
return 0;

-err_resource_free:
- sec_free_qp_ctx_resource(ctx, qp_ctx);
err_destroy_idr:
idr_destroy(&qp_ctx->req_idr);
return ret;
@@ -658,7 +653,6 @@ static int sec_create_qp_ctx(struct sec_ctx *ctx, int qp_ctx_id)
static void sec_release_qp_ctx(struct sec_ctx *ctx,
struct sec_qp_ctx *qp_ctx)
{
- hisi_qm_stop_qp(qp_ctx->qp);
sec_free_qp_ctx_resource(ctx, qp_ctx);
idr_destroy(&qp_ctx->req_idr);
}
@@ -669,10 +663,8 @@ static int sec_ctx_base_init(struct sec_ctx *ctx)
int i, ret;

ctx->qps = sec_create_qps();
- if (!ctx->qps) {
- pr_err("Can not create sec qps!\n");
+ if (!ctx->qps)
return -ENODEV;
- }

sec = container_of(ctx->qps[0]->qm, struct sec_dev, qm);
ctx->sec = sec;
@@ -708,6 +700,9 @@ static void sec_ctx_base_uninit(struct sec_ctx *ctx)
{
int i;

+ if (!ctx->qps)
+ return;
+
for (i = 0; i < ctx->sec->ctx_q_num; i++)
sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]);

@@ -719,6 +714,9 @@ static int sec_cipher_init(struct sec_ctx *ctx)
{
struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;

+ if (!ctx->qps)
+ return 0;
+
c_ctx->c_key = dma_alloc_coherent(ctx->dev, SEC_MAX_KEY_SIZE,
&c_ctx->c_key_dma, GFP_KERNEL);
if (!c_ctx->c_key)
@@ -731,6 +729,9 @@ static void sec_cipher_uninit(struct sec_ctx *ctx)
{
struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;

+ if (!ctx->qps)
+ return;
+
memzero_explicit(c_ctx->c_key, SEC_MAX_KEY_SIZE);
dma_free_coherent(ctx->dev, SEC_MAX_KEY_SIZE,
c_ctx->c_key, c_ctx->c_key_dma);
@@ -752,6 +753,9 @@ static void sec_auth_uninit(struct sec_ctx *ctx)
{
struct sec_auth_ctx *a_ctx = &ctx->a_ctx;

+ if (!ctx->qps)
+ return;
+
memzero_explicit(a_ctx->a_key, SEC_MAX_AKEY_SIZE);
dma_free_coherent(ctx->dev, SEC_MAX_AKEY_SIZE,
a_ctx->a_key, a_ctx->a_key_dma);
@@ -789,7 +793,7 @@ static int sec_skcipher_init(struct crypto_skcipher *tfm)
}

ret = sec_ctx_base_init(ctx);
- if (ret)
+ if (ret && ret != -ENODEV)
return ret;

ret = sec_cipher_init(ctx);
@@ -898,6 +902,9 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
struct device *dev = ctx->dev;
int ret;

+ if (!ctx->qps)
+ goto set_soft_key;
+
if (c_mode == SEC_CMODE_XTS) {
ret = xts_verify_key(tfm, key, keylen);
if (ret) {
@@ -928,13 +935,14 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
}

memcpy(c_ctx->c_key, key, keylen);
- if (c_ctx->fbtfm) {
- ret = crypto_sync_skcipher_setkey(c_ctx->fbtfm, key, keylen);
- if (ret) {
- dev_err(dev, "failed to set fallback skcipher key!\n");
- return ret;
- }
+
+set_soft_key:
+ ret = crypto_sync_skcipher_setkey(c_ctx->fbtfm, key, keylen);
+ if (ret) {
+ dev_err(dev, "failed to set fallback skcipher key!\n");
+ return ret;
}
+
return 0;
}

@@ -1398,6 +1406,9 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key,
struct crypto_authenc_keys keys;
int ret;

+ if (!ctx->qps)
+ return sec_aead_fallback_setkey(a_ctx, tfm, key, keylen);
+
ctx->a_ctx.a_alg = a_alg;
ctx->c_ctx.c_alg = c_alg;
c_ctx->c_mode = c_mode;
@@ -1952,7 +1963,6 @@ static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req)
} while (req->req_id < 0 && ++i < ctx->sec->ctx_q_num);

req->qp_ctx = qp_ctx;
- req->backlog = &qp_ctx->backlog;

return 0;
}
@@ -2055,6 +2065,9 @@ static int sec_skcipher_ctx_init(struct crypto_skcipher *tfm)
if (ret)
return ret;

+ if (!ctx->qps)
+ return 0;
+
if (ctx->sec->qm.ver < QM_HW_V3) {
ctx->type_supported = SEC_BD_TYPE2;
ctx->req_op = &sec_skcipher_req_ops;
@@ -2063,7 +2076,7 @@ static int sec_skcipher_ctx_init(struct crypto_skcipher *tfm)
ctx->req_op = &sec_skcipher_req_ops_v3;
}

- return ret;
+ return 0;
}

static void sec_skcipher_ctx_exit(struct crypto_skcipher *tfm)
@@ -2131,7 +2144,7 @@ static int sec_aead_ctx_init(struct crypto_aead *tfm, const char *hash_name)
int ret;

ret = sec_aead_init(tfm);
- if (ret) {
+ if (ret && ret != -ENODEV) {
pr_err("hisi_sec2: aead init error!\n");
return ret;
}
@@ -2173,7 +2186,7 @@ static int sec_aead_xcm_ctx_init(struct crypto_aead *tfm)
int ret;

ret = sec_aead_init(tfm);
- if (ret) {
+ if (ret && ret != -ENODEV) {
dev_err(ctx->dev, "hisi_sec2: aead xcm init error!\n");
return ret;
}
@@ -2318,6 +2331,9 @@ static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt)
bool need_fallback = false;
int ret;

+ if (!ctx->qps)
+ goto soft_crypto;
+
if (!sk_req->cryptlen) {
if (ctx->c_ctx.c_mode == SEC_CMODE_XTS)
return -EINVAL;
@@ -2335,9 +2351,12 @@ static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt)
return -EINVAL;

if (unlikely(ctx->c_ctx.fallback || need_fallback))
- return sec_skcipher_soft_crypto(ctx, sk_req, encrypt);
+ goto soft_crypto;

return ctx->req_op->process(ctx, req);
+
+soft_crypto:
+ return sec_skcipher_soft_crypto(ctx, sk_req, encrypt);
}

static int sec_skcipher_encrypt(struct skcipher_request *sk_req)
@@ -2545,6 +2564,9 @@ static int sec_aead_crypto(struct aead_request *a_req, bool encrypt)
bool need_fallback = false;
int ret;

+ if (!ctx->qps)
+ goto soft_crypto;
+
req->flag = a_req->base.flags;
req->aead_req.aead_req = a_req;
req->c_req.encrypt = encrypt;
@@ -2555,11 +2577,14 @@ static int sec_aead_crypto(struct aead_request *a_req, bool encrypt)
ret = sec_aead_param_check(ctx, req, &need_fallback);
if (unlikely(ret)) {
if (need_fallback)
- return sec_aead_soft_crypto(ctx, a_req, encrypt);
+ goto soft_crypto;
return -EINVAL;
}

return ctx->req_op->process(ctx, req);
+
+soft_crypto:
+ return sec_aead_soft_crypto(ctx, a_req, encrypt);
}

static int sec_aead_encrypt(struct aead_request *a_req)
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 5eb2d6820742..7dd125f5f511 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -417,18 +417,29 @@ struct hisi_qp **sec_create_qps(void)
int node = cpu_to_node(raw_smp_processor_id());
u32 ctx_num = ctx_q_num;
struct hisi_qp **qps;
+ u8 *type;
int ret;

qps = kcalloc(ctx_num, sizeof(struct hisi_qp *), GFP_KERNEL);
if (!qps)
return NULL;

- ret = hisi_qm_alloc_qps_node(&sec_devices, ctx_num, 0, node, qps);
- if (!ret)
- return qps;
+ /* The type of SEC is all 0, so just allocated by kcalloc */
+ type = kcalloc(ctx_num, sizeof(u8), GFP_KERNEL);
+ if (!type) {
+ kfree(qps);
+ return NULL;
+ }

- kfree(qps);
- return NULL;
+ ret = hisi_qm_alloc_qps_node(&sec_devices, ctx_num, type, node, qps);
+ if (ret) {
+ kfree(type);
+ kfree(qps);
+ return NULL;
+ }
+
+ kfree(type);
+ return qps;
}

u64 sec_get_alg_bitmap(struct hisi_qm *qm, u32 high, u32 low)
diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c
index 7a9ef2a9972a..848ad7b101d9 100644
--- a/drivers/crypto/hisilicon/sgl.c
+++ b/drivers/crypto/hisilicon/sgl.c
@@ -265,7 +265,7 @@ hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, struct scatterlist *sgl,
return curr_hw_sgl;

err_unmap:
- dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
+ dma_unmap_sg(dev, sgl, sg_n, dir);

return ERR_PTR(ret);
}
diff --git a/drivers/crypto/hisilicon/trng/trng.c b/drivers/crypto/hisilicon/trng/trng.c
index ac74df4a9471..5ca0b90859a8 100644
--- a/drivers/crypto/hisilicon/trng/trng.c
+++ b/drivers/crypto/hisilicon/trng/trng.c
@@ -40,6 +40,7 @@
#define SEED_SHIFT_24 24
#define SEED_SHIFT_16 16
#define SEED_SHIFT_8 8
+#define SW_MAX_RANDOM_BYTES 65520

struct hisi_trng_list {
struct mutex lock;
@@ -53,8 +54,10 @@ struct hisi_trng {
struct list_head list;
struct hwrng rng;
u32 ver;
- bool is_used;
- struct mutex mutex;
+ u32 ctx_num;
+ /* The bytes of the random number generated since the last seeding. */
+ u32 random_bytes;
+ struct mutex lock;
};

struct hisi_trng_ctx {
@@ -63,10 +66,14 @@ struct hisi_trng_ctx {

static atomic_t trng_active_devs;
static struct hisi_trng_list trng_devices;
+static int hisi_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait);

-static void hisi_trng_set_seed(struct hisi_trng *trng, const u8 *seed)
+static int hisi_trng_set_seed(struct hisi_trng *trng, const u8 *seed)
{
u32 val, seed_reg, i;
+ int ret;
+
+ writel(0x0, trng->base + SW_DRBG_BLOCKS);

for (i = 0; i < SW_DRBG_SEED_SIZE;
i += SW_DRBG_SEED_SIZE / SW_DRBG_SEED_REGS_NUM) {
@@ -78,6 +85,20 @@ static void hisi_trng_set_seed(struct hisi_trng *trng, const u8 *seed)
seed_reg = (i >> SW_DRBG_NUM_SHIFT) % SW_DRBG_SEED_REGS_NUM;
writel(val, trng->base + SW_DRBG_SEED(seed_reg));
}
+
+ writel(SW_DRBG_BLOCKS_NUM | (0x1 << SW_DRBG_ENABLE_SHIFT),
+ trng->base + SW_DRBG_BLOCKS);
+ writel(0x1, trng->base + SW_DRBG_INIT);
+ ret = readl_relaxed_poll_timeout(trng->base + SW_DRBG_STATUS,
+ val, val & BIT(0), SLEEP_US, TIMEOUT_US);
+ if (ret) {
+ pr_err("failed to init trng(%d)\n", ret);
+ return -EIO;
+ }
+
+ trng->random_bytes = 0;
+
+ return 0;
}

static int hisi_trng_seed(struct crypto_rng *tfm, const u8 *seed,
@@ -85,8 +106,7 @@ static int hisi_trng_seed(struct crypto_rng *tfm, const u8 *seed,
{
struct hisi_trng_ctx *ctx = crypto_rng_ctx(tfm);
struct hisi_trng *trng = ctx->trng;
- u32 val = 0;
- int ret = 0;
+ int ret;

if (slen < SW_DRBG_SEED_SIZE) {
pr_err("slen(%u) is not matched with trng(%d)\n", slen,
@@ -94,43 +114,45 @@ static int hisi_trng_seed(struct crypto_rng *tfm, const u8 *seed,
return -EINVAL;
}

- writel(0x0, trng->base + SW_DRBG_BLOCKS);
- hisi_trng_set_seed(trng, seed);
+ mutex_lock(&trng->lock);
+ ret = hisi_trng_set_seed(trng, seed);
+ mutex_unlock(&trng->lock);

- writel(SW_DRBG_BLOCKS_NUM | (0x1 << SW_DRBG_ENABLE_SHIFT),
- trng->base + SW_DRBG_BLOCKS);
- writel(0x1, trng->base + SW_DRBG_INIT);
+ return ret;
+}

- ret = readl_relaxed_poll_timeout(trng->base + SW_DRBG_STATUS,
- val, val & BIT(0), SLEEP_US, TIMEOUT_US);
- if (ret)
- pr_err("fail to init trng(%d)\n", ret);
+static int hisi_trng_reseed(struct hisi_trng *trng)
+{
+ u8 seed[SW_DRBG_SEED_SIZE];
+ int size;

- return ret;
+ if (!trng->random_bytes)
+ return 0;
+
+ size = hisi_trng_read(&trng->rng, seed, SW_DRBG_SEED_SIZE, false);
+ if (size != SW_DRBG_SEED_SIZE)
+ return -EIO;
+
+ return hisi_trng_set_seed(trng, seed);
}

-static int hisi_trng_generate(struct crypto_rng *tfm, const u8 *src,
- unsigned int slen, u8 *dstn, unsigned int dlen)
+static int hisi_trng_get_bytes(struct hisi_trng *trng, u8 *dstn, unsigned int dlen)
{
- struct hisi_trng_ctx *ctx = crypto_rng_ctx(tfm);
- struct hisi_trng *trng = ctx->trng;
u32 data[SW_DRBG_DATA_NUM];
u32 currsize = 0;
u32 val = 0;
int ret;
u32 i;

- if (dlen > SW_DRBG_BLOCKS_NUM * SW_DRBG_BYTES || dlen == 0) {
- pr_err("dlen(%u) exceeds limit(%d)!\n", dlen,
- SW_DRBG_BLOCKS_NUM * SW_DRBG_BYTES);
- return -EINVAL;
- }
+ ret = hisi_trng_reseed(trng);
+ if (ret)
+ return ret;

do {
ret = readl_relaxed_poll_timeout(trng->base + SW_DRBG_STATUS,
- val, val & BIT(1), SLEEP_US, TIMEOUT_US);
+ val, val & BIT(1), SLEEP_US, TIMEOUT_US);
if (ret) {
- pr_err("fail to generate random number(%d)!\n", ret);
+ pr_err("failed to generate random number(%d)!\n", ret);
break;
}

@@ -145,30 +167,57 @@ static int hisi_trng_generate(struct crypto_rng *tfm, const u8 *src,
currsize = dlen;
}

+ trng->random_bytes += SW_DRBG_BYTES;
writel(0x1, trng->base + SW_DRBG_GEN);
} while (currsize < dlen);

return ret;
}

+static int hisi_trng_generate(struct crypto_rng *tfm, const u8 *src,
+ unsigned int slen, u8 *dstn, unsigned int dlen)
+{
+ struct hisi_trng_ctx *ctx = crypto_rng_ctx(tfm);
+ struct hisi_trng *trng = ctx->trng;
+ unsigned int currsize = 0;
+ unsigned int block_size;
+ int ret;
+
+ if (!dstn || !dlen) {
+ pr_err("output is error, dlen %u!\n", dlen);
+ return -EINVAL;
+ }
+
+ do {
+ block_size = min_t(unsigned int, dlen - currsize, SW_MAX_RANDOM_BYTES);
+ mutex_lock(&trng->lock);
+ ret = hisi_trng_get_bytes(trng, dstn + currsize, block_size);
+ mutex_unlock(&trng->lock);
+ if (ret)
+ return ret;
+ currsize += block_size;
+ } while (currsize < dlen);
+
+ return 0;
+}
+
static int hisi_trng_init(struct crypto_tfm *tfm)
{
struct hisi_trng_ctx *ctx = crypto_tfm_ctx(tfm);
struct hisi_trng *trng;
- int ret = -EBUSY;
+ u32 ctx_num = ~0;

mutex_lock(&trng_devices.lock);
list_for_each_entry(trng, &trng_devices.list, list) {
- if (!trng->is_used) {
- trng->is_used = true;
+ if (trng->ctx_num < ctx_num) {
+ ctx_num = trng->ctx_num;
ctx->trng = trng;
- ret = 0;
- break;
}
}
+ ctx->trng->ctx_num++;
mutex_unlock(&trng_devices.lock);

- return ret;
+ return 0;
}

static void hisi_trng_exit(struct crypto_tfm *tfm)
@@ -176,7 +225,7 @@ static void hisi_trng_exit(struct crypto_tfm *tfm)
struct hisi_trng_ctx *ctx = crypto_tfm_ctx(tfm);

mutex_lock(&trng_devices.lock);
- ctx->trng->is_used = false;
+ ctx->trng->ctx_num--;
mutex_unlock(&trng_devices.lock);
}

@@ -238,7 +287,7 @@ static int hisi_trng_del_from_list(struct hisi_trng *trng)
int ret = -EBUSY;

mutex_lock(&trng_devices.lock);
- if (!trng->is_used) {
+ if (!trng->ctx_num) {
list_del(&trng->list);
ret = 0;
}
@@ -262,7 +311,9 @@ static int hisi_trng_probe(struct platform_device *pdev)
if (IS_ERR(trng->base))
return PTR_ERR(trng->base);

- trng->is_used = false;
+ trng->ctx_num = 0;
+ trng->random_bytes = SW_MAX_RANDOM_BYTES;
+ mutex_init(&trng->lock);
trng->ver = readl(trng->base + HISI_TRNG_VERSION);
if (!trng_devices.is_init) {
INIT_LIST_HEAD(&trng_devices.list);
diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h
index 9fb2a9c01132..b83f228281ab 100644
--- a/drivers/crypto/hisilicon/zip/zip.h
+++ b/drivers/crypto/hisilicon/zip/zip.h
@@ -99,7 +99,7 @@ enum zip_cap_table_type {
ZIP_CORE5_BITMAP,
};

-int zip_create_qps(struct hisi_qp **qps, int qp_num, int node);
+int zip_create_qps(struct hisi_qp **qps, int qp_num, int node, u8 *alg_type);
int hisi_zip_register_to_crypto(struct hisi_qm *qm);
void hisi_zip_unregister_from_crypto(struct hisi_qm *qm);
bool hisi_zip_alg_support(struct hisi_qm *qm, u32 alg);
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
index b97513981a3b..e140d4f8afe0 100644
--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -39,6 +39,7 @@ enum {
HZIP_CTX_Q_NUM
};

+#define GET_REQ_FROM_SQE(sqe) ((u64)(sqe)->dw26 | (u64)(sqe)->dw27 << 32)
#define COMP_NAME_TO_TYPE(alg_name) \
(!strcmp((alg_name), "deflate") ? HZIP_ALG_TYPE_DEFLATE : 0)

@@ -48,6 +49,7 @@ struct hisi_zip_req {
struct hisi_acc_hw_sgl *hw_dst;
dma_addr_t dma_src;
dma_addr_t dma_dst;
+ struct hisi_zip_qp_ctx *qp_ctx;
u16 req_id;
};

@@ -64,6 +66,7 @@ struct hisi_zip_qp_ctx {
struct hisi_acc_sgl_pool *sgl_pool;
struct hisi_zip *zip_dev;
struct hisi_zip_ctx *ctx;
+ u8 req_type;
};

struct hisi_zip_sqe_ops {
@@ -74,7 +77,6 @@ struct hisi_zip_sqe_ops {
void (*fill_req_type)(struct hisi_zip_sqe *sqe, u8 req_type);
void (*fill_tag)(struct hisi_zip_sqe *sqe, struct hisi_zip_req *req);
void (*fill_sqe_type)(struct hisi_zip_sqe *sqe, u8 sqe_type);
- u32 (*get_tag)(struct hisi_zip_sqe *sqe);
u32 (*get_status)(struct hisi_zip_sqe *sqe);
u32 (*get_dstlen)(struct hisi_zip_sqe *sqe);
};
@@ -82,6 +84,7 @@ struct hisi_zip_sqe_ops {
struct hisi_zip_ctx {
struct hisi_zip_qp_ctx qp_ctx[HZIP_CTX_Q_NUM];
const struct hisi_zip_sqe_ops *ops;
+ bool fallback;
};

static int sgl_sge_nr_set(const char *val, const struct kernel_param *kp)
@@ -108,6 +111,24 @@ static u16 sgl_sge_nr = HZIP_SGL_SGE_NR;
module_param_cb(sgl_sge_nr, &sgl_sge_nr_ops, &sgl_sge_nr, 0444);
MODULE_PARM_DESC(sgl_sge_nr, "Number of sge in sgl(1-255)");

+static int hisi_zip_fallback_do_work(struct acomp_req *acomp_req, bool is_decompress)
+{
+ ACOMP_FBREQ_ON_STACK(fbreq, acomp_req);
+ int ret;
+
+ if (!is_decompress)
+ ret = crypto_acomp_compress(fbreq);
+ else
+ ret = crypto_acomp_decompress(fbreq);
+ if (ret) {
+ pr_err("failed to do fallback work, ret=%d\n", ret);
+ return ret;
+ }
+
+ acomp_req->dlen = fbreq->dlen;
+ return ret;
+}
+
static struct hisi_zip_req *hisi_zip_create_req(struct hisi_zip_qp_ctx *qp_ctx,
struct acomp_req *req)
{
@@ -131,6 +152,7 @@ static struct hisi_zip_req *hisi_zip_create_req(struct hisi_zip_qp_ctx *qp_ctx,
req_cache = q + req_id;
req_cache->req_id = req_id;
req_cache->req = req;
+ req_cache->qp_ctx = qp_ctx;

return req_cache;
}
@@ -181,7 +203,8 @@ static void hisi_zip_fill_req_type(struct hisi_zip_sqe *sqe, u8 req_type)

static void hisi_zip_fill_tag(struct hisi_zip_sqe *sqe, struct hisi_zip_req *req)
{
- sqe->dw26 = req->req_id;
+ sqe->dw26 = lower_32_bits((u64)req);
+ sqe->dw27 = upper_32_bits((u64)req);
}

static void hisi_zip_fill_sqe_type(struct hisi_zip_sqe *sqe, u8 sqe_type)
@@ -213,7 +236,6 @@ static int hisi_zip_do_work(struct hisi_zip_qp_ctx *qp_ctx,
{
struct hisi_acc_sgl_pool *pool = qp_ctx->sgl_pool;
struct hisi_zip_dfx *dfx = &qp_ctx->zip_dev->dfx;
- struct hisi_zip_req_q *req_q = &qp_ctx->req_q;
struct acomp_req *a_req = req->req;
struct hisi_qp *qp = qp_ctx->qp;
struct device *dev = &qp->qm->pdev->dev;
@@ -237,18 +259,16 @@ static int hisi_zip_do_work(struct hisi_zip_qp_ctx *qp_ctx,
&req->dma_dst, DMA_FROM_DEVICE);
if (IS_ERR(req->hw_dst)) {
ret = PTR_ERR(req->hw_dst);
- dev_err(dev, "failed to map the dst buffer to hw slg (%d)!\n",
+ dev_err(dev, "failed to map the dst buffer to hw sgl (%d)!\n",
ret);
goto err_unmap_input;
}

- hisi_zip_fill_sqe(qp_ctx->ctx, &zip_sqe, qp->req_type, req);
+ hisi_zip_fill_sqe(qp_ctx->ctx, &zip_sqe, qp_ctx->req_type, req);

/* send command to start a task */
atomic64_inc(&dfx->send_cnt);
- spin_lock_bh(&req_q->req_lock);
ret = hisi_qp_send(qp, &zip_sqe);
- spin_unlock_bh(&req_q->req_lock);
if (unlikely(ret < 0)) {
atomic64_inc(&dfx->send_busy_cnt);
ret = -EAGAIN;
@@ -265,11 +285,6 @@ static int hisi_zip_do_work(struct hisi_zip_qp_ctx *qp_ctx,
return ret;
}

-static u32 hisi_zip_get_tag(struct hisi_zip_sqe *sqe)
-{
- return sqe->dw26;
-}
-
static u32 hisi_zip_get_status(struct hisi_zip_sqe *sqe)
{
return sqe->dw3 & HZIP_BD_STATUS_M;
@@ -282,14 +297,12 @@ static u32 hisi_zip_get_dstlen(struct hisi_zip_sqe *sqe)

static void hisi_zip_acomp_cb(struct hisi_qp *qp, void *data)
{
- struct hisi_zip_qp_ctx *qp_ctx = qp->qp_ctx;
+ struct hisi_zip_sqe *sqe = data;
+ struct hisi_zip_req *req = (struct hisi_zip_req *)GET_REQ_FROM_SQE(sqe);
+ struct hisi_zip_qp_ctx *qp_ctx = req->qp_ctx;
const struct hisi_zip_sqe_ops *ops = qp_ctx->ctx->ops;
struct hisi_zip_dfx *dfx = &qp_ctx->zip_dev->dfx;
- struct hisi_zip_req_q *req_q = &qp_ctx->req_q;
struct device *dev = &qp->qm->pdev->dev;
- struct hisi_zip_sqe *sqe = data;
- u32 tag = ops->get_tag(sqe);
- struct hisi_zip_req *req = req_q->q + tag;
struct acomp_req *acomp_req = req->req;
int err = 0;
u32 status;
@@ -319,10 +332,15 @@ static int hisi_zip_acompress(struct acomp_req *acomp_req)
{
struct hisi_zip_ctx *ctx = crypto_tfm_ctx(acomp_req->base.tfm);
struct hisi_zip_qp_ctx *qp_ctx = &ctx->qp_ctx[HZIP_QPC_COMP];
- struct device *dev = &qp_ctx->qp->qm->pdev->dev;
struct hisi_zip_req *req;
+ struct device *dev;
int ret;

+ if (ctx->fallback)
+ return hisi_zip_fallback_do_work(acomp_req, 0);
+
+ dev = &qp_ctx->qp->qm->pdev->dev;
+
req = hisi_zip_create_req(qp_ctx, acomp_req);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -340,10 +358,15 @@ static int hisi_zip_adecompress(struct acomp_req *acomp_req)
{
struct hisi_zip_ctx *ctx = crypto_tfm_ctx(acomp_req->base.tfm);
struct hisi_zip_qp_ctx *qp_ctx = &ctx->qp_ctx[HZIP_QPC_DECOMP];
- struct device *dev = &qp_ctx->qp->qm->pdev->dev;
struct hisi_zip_req *req;
+ struct device *dev;
int ret;

+ if (ctx->fallback)
+ return hisi_zip_fallback_do_work(acomp_req, 1);
+
+ dev = &qp_ctx->qp->qm->pdev->dev;
+
req = hisi_zip_create_req(qp_ctx, acomp_req);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -358,33 +381,6 @@ static int hisi_zip_adecompress(struct acomp_req *acomp_req)
return ret;
}

-static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *qp_ctx,
- int alg_type, int req_type)
-{
- struct device *dev = &qp->qm->pdev->dev;
- int ret;
-
- qp->req_type = req_type;
- qp->alg_type = alg_type;
- qp->qp_ctx = qp_ctx;
-
- ret = hisi_qm_start_qp(qp, 0);
- if (ret < 0) {
- dev_err(dev, "failed to start qp (%d)!\n", ret);
- return ret;
- }
-
- qp_ctx->qp = qp;
-
- return 0;
-}
-
-static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *qp_ctx)
-{
- hisi_qm_stop_qp(qp_ctx->qp);
- hisi_qm_free_qps(&qp_ctx->qp, 1);
-}
-
static const struct hisi_zip_sqe_ops hisi_zip_ops = {
.sqe_type = 0x3,
.fill_addr = hisi_zip_fill_addr,
@@ -393,7 +389,6 @@ static const struct hisi_zip_sqe_ops hisi_zip_ops = {
.fill_req_type = hisi_zip_fill_req_type,
.fill_tag = hisi_zip_fill_tag,
.fill_sqe_type = hisi_zip_fill_sqe_type,
- .get_tag = hisi_zip_get_tag,
.get_status = hisi_zip_get_status,
.get_dstlen = hisi_zip_get_dstlen,
};
@@ -402,10 +397,15 @@ static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type, int
{
struct hisi_qp *qps[HZIP_CTX_Q_NUM] = { NULL };
struct hisi_zip_qp_ctx *qp_ctx;
+ u8 alg_type[HZIP_CTX_Q_NUM];
struct hisi_zip *hisi_zip;
- int ret, i, j;
+ int ret, i;

- ret = zip_create_qps(qps, HZIP_CTX_Q_NUM, node);
+ /* alg_type = 0 for compress, 1 for decompress in hw sqe */
+ for (i = 0; i < HZIP_CTX_Q_NUM; i++)
+ alg_type[i] = i;
+
+ ret = zip_create_qps(qps, HZIP_CTX_Q_NUM, node, alg_type);
if (ret) {
pr_err("failed to create zip qps (%d)!\n", ret);
return -ENODEV;
@@ -414,19 +414,11 @@ static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type, int
hisi_zip = container_of(qps[0]->qm, struct hisi_zip, qm);

for (i = 0; i < HZIP_CTX_Q_NUM; i++) {
- /* alg_type = 0 for compress, 1 for decompress in hw sqe */
qp_ctx = &hisi_zip_ctx->qp_ctx[i];
qp_ctx->ctx = hisi_zip_ctx;
- ret = hisi_zip_start_qp(qps[i], qp_ctx, i, req_type);
- if (ret) {
- for (j = i - 1; j >= 0; j--)
- hisi_qm_stop_qp(hisi_zip_ctx->qp_ctx[j].qp);
-
- hisi_qm_free_qps(qps, HZIP_CTX_Q_NUM);
- return ret;
- }
-
qp_ctx->zip_dev = hisi_zip;
+ qp_ctx->req_type = req_type;
+ qp_ctx->qp = qps[i];
}

hisi_zip_ctx->ops = &hisi_zip_ops;
@@ -436,10 +428,13 @@ static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type, int

static void hisi_zip_ctx_exit(struct hisi_zip_ctx *hisi_zip_ctx)
{
+ struct hisi_qp *qps[HZIP_CTX_Q_NUM] = { NULL };
int i;

for (i = 0; i < HZIP_CTX_Q_NUM; i++)
- hisi_zip_release_qp(&hisi_zip_ctx->qp_ctx[i]);
+ qps[i] = hisi_zip_ctx->qp_ctx[i].qp;
+
+ hisi_qm_free_qps(qps, HZIP_CTX_Q_NUM);
}

static int hisi_zip_create_req_q(struct hisi_zip_ctx *ctx)
@@ -549,7 +544,7 @@ static int hisi_zip_acomp_init(struct crypto_acomp *tfm)
ret = hisi_zip_ctx_init(ctx, COMP_NAME_TO_TYPE(alg_name), tfm->base.node);
if (ret) {
pr_err("failed to init ctx (%d)!\n", ret);
- return ret;
+ goto switch_to_soft;
}

dev = &ctx->qp_ctx[0].qp->qm->pdev->dev;
@@ -574,17 +569,20 @@ static int hisi_zip_acomp_init(struct crypto_acomp *tfm)
hisi_zip_release_req_q(ctx);
err_ctx_exit:
hisi_zip_ctx_exit(ctx);
- return ret;
+switch_to_soft:
+ ctx->fallback = true;
+ return 0;
}

static void hisi_zip_acomp_exit(struct crypto_acomp *tfm)
{
struct hisi_zip_ctx *ctx = crypto_tfm_ctx(&tfm->base);

- hisi_zip_set_acomp_cb(ctx, NULL);
- hisi_zip_release_sgl_pool(ctx);
- hisi_zip_release_req_q(ctx);
- hisi_zip_ctx_exit(ctx);
+ if (!ctx->fallback) {
+ hisi_zip_release_sgl_pool(ctx);
+ hisi_zip_release_req_q(ctx);
+ hisi_zip_ctx_exit(ctx);
+ }
}

static struct acomp_alg hisi_zip_acomp_deflate = {
@@ -595,7 +593,8 @@ static struct acomp_alg hisi_zip_acomp_deflate = {
.base = {
.cra_name = "deflate",
.cra_driver_name = "hisi-deflate-acomp",
- .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_flags = CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
.cra_module = THIS_MODULE,
.cra_priority = HZIP_ALG_PRIORITY,
.cra_ctxsize = sizeof(struct hisi_zip_ctx),
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 4fcbe6bada06..85b26ef17548 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -446,12 +446,12 @@ static const struct pci_device_id hisi_zip_dev_ids[] = {
};
MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids);

-int zip_create_qps(struct hisi_qp **qps, int qp_num, int node)
+int zip_create_qps(struct hisi_qp **qps, int qp_num, int node, u8 *alg_type)
{
if (node == NUMA_NO_NODE)
node = cpu_to_node(raw_smp_processor_id());

- return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps);
+ return hisi_qm_alloc_qps_node(&zip_devices, qp_num, alg_type, node, qps);
}

bool hisi_zip_alg_support(struct hisi_qm *qm, u32 alg)
diff --git a/drivers/crypto/inside-secure/eip93/eip93-main.c b/drivers/crypto/inside-secure/eip93/eip93-main.c
index 0b38a567da0e..b7fd9795062d 100644
--- a/drivers/crypto/inside-secure/eip93/eip93-main.c
+++ b/drivers/crypto/inside-secure/eip93/eip93-main.c
@@ -77,11 +77,44 @@ inline void eip93_irq_clear(struct eip93_device *eip93, u32 mask)
__raw_writel(mask, eip93->base + EIP93_REG_INT_CLR);
}

-static void eip93_unregister_algs(unsigned int i)
+static int eip93_algo_is_supported(u32 alg_flags, u32 supported_algo_flags)
+{
+ if ((IS_DES(alg_flags) || IS_3DES(alg_flags)) &&
+ !(supported_algo_flags & EIP93_PE_OPTION_TDES))
+ return 0;
+
+ if (IS_AES(alg_flags) &&
+ !(supported_algo_flags & EIP93_PE_OPTION_AES))
+ return 0;
+
+ if (IS_HASH_MD5(alg_flags) &&
+ !(supported_algo_flags & EIP93_PE_OPTION_MD5))
+ return 0;
+
+ if (IS_HASH_SHA1(alg_flags) &&
+ !(supported_algo_flags & EIP93_PE_OPTION_SHA_1))
+ return 0;
+
+ if (IS_HASH_SHA224(alg_flags) &&
+ !(supported_algo_flags & EIP93_PE_OPTION_SHA_224))
+ return 0;
+
+ if (IS_HASH_SHA256(alg_flags) &&
+ !(supported_algo_flags & EIP93_PE_OPTION_SHA_256))
+ return 0;
+
+ return 1;
+}
+
+static void eip93_unregister_algs(u32 supported_algo_flags, unsigned int i)
{
unsigned int j;

for (j = 0; j < i; j++) {
+ if (!eip93_algo_is_supported(eip93_algs[j]->flags,
+ supported_algo_flags))
+ continue;
+
switch (eip93_algs[j]->type) {
case EIP93_ALG_TYPE_SKCIPHER:
crypto_unregister_skcipher(&eip93_algs[j]->alg.skcipher);
@@ -90,7 +123,7 @@ static void eip93_unregister_algs(unsigned int i)
crypto_unregister_aead(&eip93_algs[j]->alg.aead);
break;
case EIP93_ALG_TYPE_HASH:
- crypto_unregister_ahash(&eip93_algs[i]->alg.ahash);
+ crypto_unregister_ahash(&eip93_algs[j]->alg.ahash);
break;
}
}
@@ -106,49 +139,27 @@ static int eip93_register_algs(struct eip93_device *eip93, u32 supported_algo_fl

eip93_algs[i]->eip93 = eip93;

- if ((IS_DES(alg_flags) || IS_3DES(alg_flags)) &&
- !(supported_algo_flags & EIP93_PE_OPTION_TDES))
+ if (!eip93_algo_is_supported(alg_flags, supported_algo_flags))
continue;

- if (IS_AES(alg_flags)) {
- if (!(supported_algo_flags & EIP93_PE_OPTION_AES))
- continue;
+ if (IS_AES(alg_flags) && !IS_HMAC(alg_flags)) {
+ if (supported_algo_flags & EIP93_PE_OPTION_AES_KEY128)
+ eip93_algs[i]->alg.skcipher.max_keysize =
+ AES_KEYSIZE_128;

- if (!IS_HMAC(alg_flags)) {
- if (supported_algo_flags & EIP93_PE_OPTION_AES_KEY128)
- eip93_algs[i]->alg.skcipher.max_keysize =
- AES_KEYSIZE_128;
+ if (supported_algo_flags & EIP93_PE_OPTION_AES_KEY192)
+ eip93_algs[i]->alg.skcipher.max_keysize =
+ AES_KEYSIZE_192;

- if (supported_algo_flags & EIP93_PE_OPTION_AES_KEY192)
- eip93_algs[i]->alg.skcipher.max_keysize =
- AES_KEYSIZE_192;
+ if (supported_algo_flags & EIP93_PE_OPTION_AES_KEY256)
+ eip93_algs[i]->alg.skcipher.max_keysize =
+ AES_KEYSIZE_256;

- if (supported_algo_flags & EIP93_PE_OPTION_AES_KEY256)
- eip93_algs[i]->alg.skcipher.max_keysize =
- AES_KEYSIZE_256;
-
- if (IS_RFC3686(alg_flags))
- eip93_algs[i]->alg.skcipher.max_keysize +=
- CTR_RFC3686_NONCE_SIZE;
- }
+ if (IS_RFC3686(alg_flags))
+ eip93_algs[i]->alg.skcipher.max_keysize +=
+ CTR_RFC3686_NONCE_SIZE;
}

- if (IS_HASH_MD5(alg_flags) &&
- !(supported_algo_flags & EIP93_PE_OPTION_MD5))
- continue;
-
- if (IS_HASH_SHA1(alg_flags) &&
- !(supported_algo_flags & EIP93_PE_OPTION_SHA_1))
- continue;
-
- if (IS_HASH_SHA224(alg_flags) &&
- !(supported_algo_flags & EIP93_PE_OPTION_SHA_224))
- continue;
-
- if (IS_HASH_SHA256(alg_flags) &&
- !(supported_algo_flags & EIP93_PE_OPTION_SHA_256))
- continue;
-
switch (eip93_algs[i]->type) {
case EIP93_ALG_TYPE_SKCIPHER:
ret = crypto_register_skcipher(&eip93_algs[i]->alg.skcipher);
@@ -167,7 +178,7 @@ static int eip93_register_algs(struct eip93_device *eip93, u32 supported_algo_fl
return 0;

fail:
- eip93_unregister_algs(i);
+ eip93_unregister_algs(supported_algo_flags, i);

return ret;
}
@@ -469,8 +480,11 @@ static int eip93_crypto_probe(struct platform_device *pdev)
static void eip93_crypto_remove(struct platform_device *pdev)
{
struct eip93_device *eip93 = platform_get_drvdata(pdev);
+ u32 algo_flags;
+
+ algo_flags = readl(eip93->base + EIP93_REG_PE_OPTION_1);

- eip93_unregister_algs(ARRAY_SIZE(eip93_algs));
+ eip93_unregister_algs(algo_flags, ARRAY_SIZE(eip93_algs));
eip93_cleanup(eip93);
}

diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c
index b9b5e744a3f1..af8dbc7517cf 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c
@@ -148,6 +148,16 @@ static struct pfvf_message handle_blkmsg_req(struct adf_accel_vf_info *vf_info,
blk_byte = FIELD_GET(ADF_VF2PF_SMALL_BLOCK_BYTE_MASK, req.data);
byte_max = ADF_VF2PF_SMALL_BLOCK_BYTE_MAX;
break;
+ default:
+ dev_err(&GET_DEV(vf_info->accel_dev),
+ "Invalid BlockMsg type 0x%.4x received from VF%u\n",
+ req.type, vf_info->vf_nr);
+ resp.type = ADF_PF2VF_MSGTYPE_BLKMSG_RESP;
+ resp.data = FIELD_PREP(ADF_PF2VF_BLKMSG_RESP_TYPE_MASK,
+ ADF_PF2VF_BLKMSG_RESP_TYPE_ERROR) |
+ FIELD_PREP(ADF_PF2VF_BLKMSG_RESP_DATA_MASK,
+ ADF_PF2VF_UNSPECIFIED_ERROR);
+ return resp;
}

/* Is this a request for CRC or data? */
diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
index 88a41d1ca5f6..6c0bfb3ea1c9 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c
@@ -168,7 +168,8 @@ static void free_command_queues(struct otx_cptvf *cptvf,
chunk = list_first_entry(&cqinfo->queue[i].chead,
struct otx_cpt_cmd_chunk, nextchunk);

- dma_free_coherent(&pdev->dev, chunk->size,
+ dma_free_coherent(&pdev->dev,
+ chunk->size + OTX_CPT_NEXT_CHUNK_PTR_SIZE,
chunk->head,
chunk->dma_addr);
chunk->head = NULL;
diff --git a/drivers/crypto/starfive/jh7110-aes.c b/drivers/crypto/starfive/jh7110-aes.c
index 426b24889af8..01195664cc7c 100644
--- a/drivers/crypto/starfive/jh7110-aes.c
+++ b/drivers/crypto/starfive/jh7110-aes.c
@@ -669,8 +669,10 @@ static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq
return -ENOMEM;

if (sg_copy_to_buffer(req->src, sg_nents_for_len(req->src, cryp->assoclen),
- rctx->adata, cryp->assoclen) != cryp->assoclen)
+ rctx->adata, cryp->assoclen) != cryp->assoclen) {
+ kfree(rctx->adata);
return -EINVAL;
+ }
}

if (cryp->total_in)
@@ -681,8 +683,11 @@ static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq
ctx->rctx = rctx;

ret = starfive_aes_hw_init(ctx);
- if (ret)
+ if (ret) {
+ if (cryp->assoclen)
+ kfree(rctx->adata);
return ret;
+ }

if (!cryp->assoclen)
goto write_text;
diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c
index 79994ca9bc9f..81160260e26b 100644
--- a/drivers/cxl/core/edac.c
+++ b/drivers/cxl/core/edac.c
@@ -1988,6 +1988,40 @@ static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd,
return 0;
}

+static void err_rec_free(void *_cxlmd)
+{
+ struct cxl_memdev *cxlmd = _cxlmd;
+ struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
+ struct cxl_event_gen_media *rec_gen_media;
+ struct cxl_event_dram *rec_dram;
+ unsigned long index;
+
+ cxlmd->err_rec_array = NULL;
+ xa_for_each(&array_rec->rec_dram, index, rec_dram)
+ kfree(rec_dram);
+ xa_destroy(&array_rec->rec_dram);
+
+ xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media)
+ kfree(rec_gen_media);
+ xa_destroy(&array_rec->rec_gen_media);
+ kfree(array_rec);
+}
+
+static int devm_cxl_memdev_setup_err_rec(struct cxl_memdev *cxlmd)
+{
+ struct cxl_mem_err_rec *array_rec =
+ kzalloc(sizeof(*array_rec), GFP_KERNEL);
+
+ if (!array_rec)
+ return -ENOMEM;
+
+ xa_init(&array_rec->rec_gen_media);
+ xa_init(&array_rec->rec_dram);
+ cxlmd->err_rec_array = array_rec;
+
+ return devm_add_action_or_reset(&cxlmd->dev, err_rec_free, cxlmd);
+}
+
int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
{
struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES];
@@ -2038,15 +2072,9 @@ int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
}

if (repair_inst) {
- struct cxl_mem_err_rec *array_rec =
- devm_kzalloc(&cxlmd->dev, sizeof(*array_rec),
- GFP_KERNEL);
- if (!array_rec)
- return -ENOMEM;
-
- xa_init(&array_rec->rec_gen_media);
- xa_init(&array_rec->rec_dram);
- cxlmd->err_rec_array = array_rec;
+ rc = devm_cxl_memdev_setup_err_rec(cxlmd);
+ if (rc)
+ return rc;
}
}

@@ -2088,22 +2116,4 @@ int devm_cxl_region_edac_register(struct cxl_region *cxlr)
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_region_edac_register, "CXL");

-void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd)
-{
- struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
- struct cxl_event_gen_media *rec_gen_media;
- struct cxl_event_dram *rec_dram;
- unsigned long index;
-
- if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
- return;
-
- xa_for_each(&array_rec->rec_dram, index, rec_dram)
- kfree(rec_dram);
- xa_destroy(&array_rec->rec_dram);

- xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media)
- kfree(rec_gen_media);
- xa_destroy(&array_rec->rec_gen_media);
-}
-EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_release, "CXL");
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 20dd63810806..13dafac7c6d5 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -844,14 +844,13 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld)
scoped_guard(rwsem_read, &cxl_rwsem.dpa)
setup_hw_decoder(cxld, hdm);

- port->commit_end++;
rc = cxld_await_commit(hdm, cxld->id);
if (rc) {
dev_dbg(&port->dev, "%s: error %d committing decoder\n",
dev_name(&cxld->dev), rc);
- cxld->reset(cxld);
return rc;
}
+ port->commit_end++;
cxld->flags |= CXL_DECODER_F_ENABLE;

return 0;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index e370d733e440..4dff7f44d908 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -27,7 +27,6 @@ static void cxl_memdev_release(struct device *dev)
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);

ida_free(&cxl_memdev_ida, cxlmd->id);
- devm_cxl_memdev_edac_release(cxlmd);
kfree(cxlmd);
}

diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 804e4a48540f..85131872d7f6 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -823,16 +823,18 @@ DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject,

static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport)
{
+ struct cxl_port *parent = parent_port_of(dport->port);
struct dentry *dir;

if (!einj_cxl_is_initialized())
return;

/*
- * dport_dev needs to be a PCIe port for CXL 2.0+ ports because
- * EINJ expects a dport SBDF to be specified for 2.0 error injection.
+ * Protocol error injection is only available for CXL 2.0+ root ports
+ * and CXL 1.1 downstream ports
*/
- if (!dport->rch && !dev_is_pci(dport->dport_dev))
+ if (!dport->rch &&
+ !(dev_is_pci(dport->dport_dev) && parent && is_cxl_root(parent)))
return;

dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev));
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 434031a0c1f7..c12ab4fc9512 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -63,7 +63,7 @@ struct cxl_memdev {
int depth;
u8 scrub_cycle;
int scrub_region_id;
- void *err_rec_array;
+ struct cxl_mem_err_rec *err_rec_array;
};

static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
@@ -877,7 +877,6 @@ int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd);
int devm_cxl_region_edac_register(struct cxl_region *cxlr);
int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt);
int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt);
-void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd);
#else
static inline int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
{ return 0; }
@@ -889,8 +888,6 @@ static inline int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd,
static inline int cxl_store_rec_dram(struct cxl_memdev *cxlmd,
union cxl_event *evt)
{ return 0; }
-static inline void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd)
-{ return; }
#endif

#ifdef CONFIG_CXL_SUSPEND
diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
index 5b06b0dc67ee..0f25f6d8ae71 100644
--- a/drivers/dma/dma-axi-dmac.c
+++ b/drivers/dma/dma-axi-dmac.c
@@ -233,11 +233,9 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
unsigned int flags = 0;
unsigned int val;

- if (!chan->hw_sg) {
- val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER);
- if (val) /* Queue is full, wait for the next SOT IRQ */
- return;
- }
+ val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER);
+ if (val) /* Queue is full, wait for the next SOT IRQ */
+ return;

desc = chan->next_desc;

@@ -247,6 +245,7 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
return;
list_move_tail(&vdesc->node, &chan->active_descs);
desc = to_axi_dmac_desc(vdesc);
+ chan->next_desc = desc;
}
sg = &desc->sg[desc->num_submitted];

@@ -265,8 +264,6 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
else
chan->next_desc = NULL;
flags |= AXI_DMAC_FLAG_LAST;
- } else {
- chan->next_desc = desc;
}

sg->hw->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID);
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index 97583c7d51a2..093185768ad8 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -915,7 +915,6 @@ static void fsl_edma_remove(struct platform_device *pdev)
of_dma_controller_free(np);
dma_async_device_unregister(&fsl_edma->dma_dev);
fsl_edma_cleanup_vchan(&fsl_edma->dma_dev);
- fsl_disable_clocks(fsl_edma, fsl_edma->drvdata->dmamuxs);
}

static int fsl_edma_suspend_late(struct device *dev)
diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
index 08e15177427b..96c18c815f1d 100644
--- a/drivers/dma/mediatek/mtk-uart-apdma.c
+++ b/drivers/dma/mediatek/mtk-uart-apdma.c
@@ -41,7 +41,7 @@
#define VFF_STOP_CLR_B 0
#define VFF_EN_CLR_B 0
#define VFF_INT_EN_CLR_B 0
-#define VFF_4G_SUPPORT_CLR_B 0
+#define VFF_ADDR2_CLR_B 0

/*
* interrupt trigger level for tx
@@ -72,7 +72,7 @@
/* TX: the buffer size SW can write. RX: the buffer size HW can write. */
#define VFF_LEFT_SIZE 0x40
#define VFF_DEBUG_STATUS 0x50
-#define VFF_4G_SUPPORT 0x54
+#define VFF_ADDR2 0x54

struct mtk_uart_apdmadev {
struct dma_device ddev;
@@ -149,7 +149,7 @@ static void mtk_uart_apdma_start_tx(struct mtk_chan *c)
mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B);

if (mtkd->support_33bits)
- mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_EN_B);
+ mtk_uart_apdma_write(c, VFF_ADDR2, upper_32_bits(d->addr));
}

mtk_uart_apdma_write(c, VFF_EN, VFF_EN_B);
@@ -192,7 +192,7 @@ static void mtk_uart_apdma_start_rx(struct mtk_chan *c)
mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_RX_INT_CLR_B);

if (mtkd->support_33bits)
- mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_EN_B);
+ mtk_uart_apdma_write(c, VFF_ADDR2, upper_32_bits(d->addr));
}

mtk_uart_apdma_write(c, VFF_INT_EN, VFF_RX_INT_EN_B);
@@ -298,7 +298,7 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan)
}

if (mtkd->support_33bits)
- mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_SUPPORT_CLR_B);
+ mtk_uart_apdma_write(c, VFF_ADDR2, VFF_ADDR2_CLR_B);

err_pm:
pm_runtime_put_noidle(mtkd->ddev.dev);
diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c
index a4153bcb6dcf..64944f601ee5 100644
--- a/drivers/dpll/dpll_netlink.c
+++ b/drivers/dpll/dpll_netlink.c
@@ -637,6 +637,10 @@ dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin,
ret = dpll_msg_add_pin_freq(msg, pin, ref, extack);
if (ret)
return ret;
+ if (prop->phase_gran &&
+ nla_put_u32(msg, DPLL_A_PIN_PHASE_ADJUST_GRAN,
+ prop->phase_gran))
+ return -EMSGSIZE;
if (nla_put_s32(msg, DPLL_A_PIN_PHASE_ADJUST_MIN,
prop->phase_range.min))
return -EMSGSIZE;
@@ -1261,7 +1265,13 @@ dpll_pin_phase_adj_set(struct dpll_pin *pin, struct nlattr *phase_adj_attr,
if (phase_adj > pin->prop.phase_range.max ||
phase_adj < pin->prop.phase_range.min) {
NL_SET_ERR_MSG_ATTR(extack, phase_adj_attr,
- "phase adjust value not supported");
+ "phase adjust value of out range");
+ return -EINVAL;
+ }
+ if (pin->prop.phase_gran && phase_adj % (s32)pin->prop.phase_gran) {
+ NL_SET_ERR_MSG_ATTR_FMT(extack, phase_adj_attr,
+ "phase adjust value not multiple of %u",
+ pin->prop.phase_gran);
return -EINVAL;
}

diff --git a/drivers/dpll/zl3073x/Makefile b/drivers/dpll/zl3073x/Makefile
index 84e22aae57e5..bd324c7fe710 100644
--- a/drivers/dpll/zl3073x/Makefile
+++ b/drivers/dpll/zl3073x/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0

obj-$(CONFIG_ZL3073X) += zl3073x.o
-zl3073x-objs := core.o devlink.o dpll.o flash.o fw.o prop.o
+zl3073x-objs := core.o devlink.o dpll.o flash.o fw.o \
+ out.o prop.o ref.o synth.o

obj-$(CONFIG_ZL3073X_I2C) += zl3073x_i2c.o
zl3073x_i2c-objs := i2c.o
diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c
index e42e527813cf..2f340f7eb9ec 100644
--- a/drivers/dpll/zl3073x/core.c
+++ b/drivers/dpll/zl3073x/core.c
@@ -129,47 +129,6 @@ const struct regmap_config zl3073x_regmap_config = {
};
EXPORT_SYMBOL_NS_GPL(zl3073x_regmap_config, "ZL3073X");

-/**
- * zl3073x_ref_freq_factorize - factorize given frequency
- * @freq: input frequency
- * @base: base frequency
- * @mult: multiplier
- *
- * Checks if the given frequency can be factorized using one of the
- * supported base frequencies. If so the base frequency and multiplier
- * are stored into appropriate parameters if they are not NULL.
- *
- * Return: 0 on success, -EINVAL if the frequency cannot be factorized
- */
-int
-zl3073x_ref_freq_factorize(u32 freq, u16 *base, u16 *mult)
-{
- static const u16 base_freqs[] = {
- 1, 2, 4, 5, 8, 10, 16, 20, 25, 32, 40, 50, 64, 80, 100, 125,
- 128, 160, 200, 250, 256, 320, 400, 500, 625, 640, 800, 1000,
- 1250, 1280, 1600, 2000, 2500, 3125, 3200, 4000, 5000, 6250,
- 6400, 8000, 10000, 12500, 15625, 16000, 20000, 25000, 31250,
- 32000, 40000, 50000, 62500,
- };
- u32 div;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(base_freqs); i++) {
- div = freq / base_freqs[i];
-
- if (div <= U16_MAX && (freq % base_freqs[i]) == 0) {
- if (base)
- *base = base_freqs[i];
- if (mult)
- *mult = div;
-
- return 0;
- }
- }
-
- return -EINVAL;
-}
-
static bool
zl3073x_check_reg(struct zl3073x_dev *zldev, unsigned int reg, size_t size)
{
@@ -593,190 +552,6 @@ int zl3073x_write_hwreg_seq(struct zl3073x_dev *zldev,
return rc;
}

-/**
- * zl3073x_ref_state_fetch - get input reference state
- * @zldev: pointer to zl3073x_dev structure
- * @index: input reference index to fetch state for
- *
- * Function fetches information for the given input reference that are
- * invariant and stores them for later use.
- *
- * Return: 0 on success, <0 on error
- */
-static int
-zl3073x_ref_state_fetch(struct zl3073x_dev *zldev, u8 index)
-{
- struct zl3073x_ref *input = &zldev->ref[index];
- u8 ref_config;
- int rc;
-
- /* If the input is differential then the configuration for N-pin
- * reference is ignored and P-pin config is used for both.
- */
- if (zl3073x_is_n_pin(index) &&
- zl3073x_ref_is_diff(zldev, index - 1)) {
- input->enabled = zl3073x_ref_is_enabled(zldev, index - 1);
- input->diff = true;
-
- return 0;
- }
-
- guard(mutex)(&zldev->multiop_lock);
-
- /* Read reference configuration */
- rc = zl3073x_mb_op(zldev, ZL_REG_REF_MB_SEM, ZL_REF_MB_SEM_RD,
- ZL_REG_REF_MB_MASK, BIT(index));
- if (rc)
- return rc;
-
- /* Read ref_config register */
- rc = zl3073x_read_u8(zldev, ZL_REG_REF_CONFIG, &ref_config);
- if (rc)
- return rc;
-
- input->enabled = FIELD_GET(ZL_REF_CONFIG_ENABLE, ref_config);
- input->diff = FIELD_GET(ZL_REF_CONFIG_DIFF_EN, ref_config);
-
- dev_dbg(zldev->dev, "REF%u is %s and configured as %s\n", index,
- str_enabled_disabled(input->enabled),
- input->diff ? "differential" : "single-ended");
-
- return rc;
-}
-
-/**
- * zl3073x_out_state_fetch - get output state
- * @zldev: pointer to zl3073x_dev structure
- * @index: output index to fetch state for
- *
- * Function fetches information for the given output (not output pin)
- * that are invariant and stores them for later use.
- *
- * Return: 0 on success, <0 on error
- */
-static int
-zl3073x_out_state_fetch(struct zl3073x_dev *zldev, u8 index)
-{
- struct zl3073x_out *out = &zldev->out[index];
- u8 output_ctrl, output_mode;
- int rc;
-
- /* Read output configuration */
- rc = zl3073x_read_u8(zldev, ZL_REG_OUTPUT_CTRL(index), &output_ctrl);
- if (rc)
- return rc;
-
- /* Store info about output enablement and synthesizer the output
- * is connected to.
- */
- out->enabled = FIELD_GET(ZL_OUTPUT_CTRL_EN, output_ctrl);
- out->synth = FIELD_GET(ZL_OUTPUT_CTRL_SYNTH_SEL, output_ctrl);
-
- dev_dbg(zldev->dev, "OUT%u is %s and connected to SYNTH%u\n", index,
- str_enabled_disabled(out->enabled), out->synth);
-
- guard(mutex)(&zldev->multiop_lock);
-
- /* Read output configuration */
- rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD,
- ZL_REG_OUTPUT_MB_MASK, BIT(index));
- if (rc)
- return rc;
-
- /* Read output_mode */
- rc = zl3073x_read_u8(zldev, ZL_REG_OUTPUT_MODE, &output_mode);
- if (rc)
- return rc;
-
- /* Extract and store output signal format */
- out->signal_format = FIELD_GET(ZL_OUTPUT_MODE_SIGNAL_FORMAT,
- output_mode);
-
- dev_dbg(zldev->dev, "OUT%u has signal format 0x%02x\n", index,
- out->signal_format);
-
- return rc;
-}
-
-/**
- * zl3073x_synth_state_fetch - get synth state
- * @zldev: pointer to zl3073x_dev structure
- * @index: synth index to fetch state for
- *
- * Function fetches information for the given synthesizer that are
- * invariant and stores them for later use.
- *
- * Return: 0 on success, <0 on error
- */
-static int
-zl3073x_synth_state_fetch(struct zl3073x_dev *zldev, u8 index)
-{
- struct zl3073x_synth *synth = &zldev->synth[index];
- u16 base, m, n;
- u8 synth_ctrl;
- u32 mult;
- int rc;
-
- /* Read synth control register */
- rc = zl3073x_read_u8(zldev, ZL_REG_SYNTH_CTRL(index), &synth_ctrl);
- if (rc)
- return rc;
-
- /* Store info about synth enablement and DPLL channel the synth is
- * driven by.
- */
- synth->enabled = FIELD_GET(ZL_SYNTH_CTRL_EN, synth_ctrl);
- synth->dpll = FIELD_GET(ZL_SYNTH_CTRL_DPLL_SEL, synth_ctrl);
-
- dev_dbg(zldev->dev, "SYNTH%u is %s and driven by DPLL%u\n", index,
- str_enabled_disabled(synth->enabled), synth->dpll);
-
- guard(mutex)(&zldev->multiop_lock);
-
- /* Read synth configuration */
- rc = zl3073x_mb_op(zldev, ZL_REG_SYNTH_MB_SEM, ZL_SYNTH_MB_SEM_RD,
- ZL_REG_SYNTH_MB_MASK, BIT(index));
- if (rc)
- return rc;
-
- /* The output frequency is determined by the following formula:
- * base * multiplier * numerator / denominator
- *
- * Read registers with these values
- */
- rc = zl3073x_read_u16(zldev, ZL_REG_SYNTH_FREQ_BASE, &base);
- if (rc)
- return rc;
-
- rc = zl3073x_read_u32(zldev, ZL_REG_SYNTH_FREQ_MULT, &mult);
- if (rc)
- return rc;
-
- rc = zl3073x_read_u16(zldev, ZL_REG_SYNTH_FREQ_M, &m);
- if (rc)
- return rc;
-
- rc = zl3073x_read_u16(zldev, ZL_REG_SYNTH_FREQ_N, &n);
- if (rc)
- return rc;
-
- /* Check denominator for zero to avoid div by 0 */
- if (!n) {
- dev_err(zldev->dev,
- "Zero divisor for SYNTH%u retrieved from device\n",
- index);
- return -EINVAL;
- }
-
- /* Compute and store synth frequency */
- zldev->synth[index].freq = div_u64(mul_u32_u32(base * m, mult), n);
-
- dev_dbg(zldev->dev, "SYNTH%u frequency: %u Hz\n", index,
- zldev->synth[index].freq);
-
- return rc;
-}
-
static int
zl3073x_dev_state_fetch(struct zl3073x_dev *zldev)
{
diff --git a/drivers/dpll/zl3073x/core.h b/drivers/dpll/zl3073x/core.h
index 1dca4ddcf235..fe779fc77dd0 100644
--- a/drivers/dpll/zl3073x/core.h
+++ b/drivers/dpll/zl3073x/core.h
@@ -9,7 +9,10 @@
#include <linux/mutex.h>
#include <linux/types.h>

+#include "out.h"
+#include "ref.h"
#include "regs.h"
+#include "synth.h"

struct device;
struct regmap;
@@ -27,42 +30,6 @@ struct zl3073x_dpll;
#define ZL3073X_NUM_PINS (ZL3073X_NUM_INPUT_PINS + \
ZL3073X_NUM_OUTPUT_PINS)

-/**
- * struct zl3073x_ref - input reference invariant info
- * @enabled: input reference is enabled or disabled
- * @diff: true if input reference is differential
- * @ffo: current fractional frequency offset
- */
-struct zl3073x_ref {
- bool enabled;
- bool diff;
- s64 ffo;
-};
-
-/**
- * struct zl3073x_out - output invariant info
- * @enabled: out is enabled or disabled
- * @synth: synthesizer the out is connected to
- * @signal_format: out signal format
- */
-struct zl3073x_out {
- bool enabled;
- u8 synth;
- u8 signal_format;
-};
-
-/**
- * struct zl3073x_synth - synthesizer invariant info
- * @freq: synthesizer frequency
- * @dpll: ID of DPLL the synthesizer is driven by
- * @enabled: synth is enabled or disabled
- */
-struct zl3073x_synth {
- u32 freq;
- u8 dpll;
- bool enabled;
-};
-
/**
* struct zl3073x_dev - zl3073x device
* @dev: pointer to device
@@ -175,7 +142,6 @@ int zl3073x_write_hwreg_seq(struct zl3073x_dev *zldev,
* Misc operations
*****************/

-int zl3073x_ref_freq_factorize(u32 freq, u16 *base, u16 *mult);
int zl3073x_ref_phase_offsets_update(struct zl3073x_dev *zldev, int channel);

static inline bool
@@ -217,172 +183,188 @@ zl3073x_output_pin_out_get(u8 id)
}

/**
- * zl3073x_ref_ffo_get - get current fractional frequency offset
+ * zl3073x_dev_ref_ffo_get - get current fractional frequency offset
* @zldev: pointer to zl3073x device
* @index: input reference index
*
* Return: the latest measured fractional frequency offset
*/
static inline s64
-zl3073x_ref_ffo_get(struct zl3073x_dev *zldev, u8 index)
+zl3073x_dev_ref_ffo_get(struct zl3073x_dev *zldev, u8 index)
{
- return zldev->ref[index].ffo;
+ const struct zl3073x_ref *ref = zl3073x_ref_state_get(zldev, index);
+
+ return zl3073x_ref_ffo_get(ref);
}

/**
- * zl3073x_ref_is_diff - check if the given input reference is differential
+ * zl3073x_dev_ref_is_diff - check if the given input reference is differential
* @zldev: pointer to zl3073x device
* @index: input reference index
*
* Return: true if reference is differential, false if reference is single-ended
*/
static inline bool
-zl3073x_ref_is_diff(struct zl3073x_dev *zldev, u8 index)
+zl3073x_dev_ref_is_diff(struct zl3073x_dev *zldev, u8 index)
{
- return zldev->ref[index].diff;
+ const struct zl3073x_ref *ref = zl3073x_ref_state_get(zldev, index);
+
+ return zl3073x_ref_is_diff(ref);
}

/**
- * zl3073x_ref_is_enabled - check if the given input reference is enabled
+ * zl3073x_dev_ref_is_enabled - check if the given input reference is enabled
* @zldev: pointer to zl3073x device
* @index: input reference index
*
* Return: true if input refernce is enabled, false otherwise
*/
static inline bool
-zl3073x_ref_is_enabled(struct zl3073x_dev *zldev, u8 index)
+zl3073x_dev_ref_is_enabled(struct zl3073x_dev *zldev, u8 index)
{
- return zldev->ref[index].enabled;
+ const struct zl3073x_ref *ref = zl3073x_ref_state_get(zldev, index);
+
+ return zl3073x_ref_is_enabled(ref);
}

/**
- * zl3073x_synth_dpll_get - get DPLL ID the synth is driven by
+ * zl3073x_dev_synth_dpll_get - get DPLL ID the synth is driven by
* @zldev: pointer to zl3073x device
* @index: synth index
*
* Return: ID of DPLL the given synthetizer is driven by
*/
static inline u8
-zl3073x_synth_dpll_get(struct zl3073x_dev *zldev, u8 index)
+zl3073x_dev_synth_dpll_get(struct zl3073x_dev *zldev, u8 index)
{
- return zldev->synth[index].dpll;
+ const struct zl3073x_synth *synth;
+
+ synth = zl3073x_synth_state_get(zldev, index);
+ return zl3073x_synth_dpll_get(synth);
}

/**
- * zl3073x_synth_freq_get - get synth current freq
+ * zl3073x_dev_synth_freq_get - get synth current freq
* @zldev: pointer to zl3073x device
* @index: synth index
*
* Return: frequency of given synthetizer
*/
static inline u32
-zl3073x_synth_freq_get(struct zl3073x_dev *zldev, u8 index)
+zl3073x_dev_synth_freq_get(struct zl3073x_dev *zldev, u8 index)
{
- return zldev->synth[index].freq;
+ const struct zl3073x_synth *synth;
+
+ synth = zl3073x_synth_state_get(zldev, index);
+ return zl3073x_synth_freq_get(synth);
}

/**
- * zl3073x_synth_is_enabled - check if the given synth is enabled
+ * zl3073x_dev_synth_is_enabled - check if the given synth is enabled
* @zldev: pointer to zl3073x device
* @index: synth index
*
* Return: true if synth is enabled, false otherwise
*/
static inline bool
-zl3073x_synth_is_enabled(struct zl3073x_dev *zldev, u8 index)
+zl3073x_dev_synth_is_enabled(struct zl3073x_dev *zldev, u8 index)
{
- return zldev->synth[index].enabled;
+ const struct zl3073x_synth *synth;
+
+ synth = zl3073x_synth_state_get(zldev, index);
+ return zl3073x_synth_is_enabled(synth);
}

/**
- * zl3073x_out_synth_get - get synth connected to given output
+ * zl3073x_dev_out_synth_get - get synth connected to given output
* @zldev: pointer to zl3073x device
* @index: output index
*
* Return: index of synth connected to given output.
*/
static inline u8
-zl3073x_out_synth_get(struct zl3073x_dev *zldev, u8 index)
+zl3073x_dev_out_synth_get(struct zl3073x_dev *zldev, u8 index)
{
- return zldev->out[index].synth;
+ const struct zl3073x_out *out = zl3073x_out_state_get(zldev, index);
+
+ return zl3073x_out_synth_get(out);
}

/**
- * zl3073x_out_is_enabled - check if the given output is enabled
+ * zl3073x_dev_out_is_enabled - check if the given output is enabled
* @zldev: pointer to zl3073x device
* @index: output index
*
* Return: true if the output is enabled, false otherwise
*/
static inline bool
-zl3073x_out_is_enabled(struct zl3073x_dev *zldev, u8 index)
+zl3073x_dev_out_is_enabled(struct zl3073x_dev *zldev, u8 index)
{
- u8 synth;
+ const struct zl3073x_out *out = zl3073x_out_state_get(zldev, index);
+ const struct zl3073x_synth *synth;
+ u8 synth_id;

/* Output is enabled only if associated synth is enabled */
- synth = zl3073x_out_synth_get(zldev, index);
- if (zl3073x_synth_is_enabled(zldev, synth))
- return zldev->out[index].enabled;
+ synth_id = zl3073x_out_synth_get(out);
+ synth = zl3073x_synth_state_get(zldev, synth_id);

- return false;
+ return zl3073x_synth_is_enabled(synth) && zl3073x_out_is_enabled(out);
}

/**
- * zl3073x_out_signal_format_get - get output signal format
+ * zl3073x_dev_out_signal_format_get - get output signal format
* @zldev: pointer to zl3073x device
* @index: output index
*
* Return: signal format of given output
*/
static inline u8
-zl3073x_out_signal_format_get(struct zl3073x_dev *zldev, u8 index)
+zl3073x_dev_out_signal_format_get(struct zl3073x_dev *zldev, u8 index)
{
- return zldev->out[index].signal_format;
+ const struct zl3073x_out *out = zl3073x_out_state_get(zldev, index);
+
+ return zl3073x_out_signal_format_get(out);
}

/**
- * zl3073x_out_dpll_get - get DPLL ID the output is driven by
+ * zl3073x_dev_out_dpll_get - get DPLL ID the output is driven by
* @zldev: pointer to zl3073x device
* @index: output index
*
* Return: ID of DPLL the given output is driven by
*/
static inline
-u8 zl3073x_out_dpll_get(struct zl3073x_dev *zldev, u8 index)
+u8 zl3073x_dev_out_dpll_get(struct zl3073x_dev *zldev, u8 index)
{
- u8 synth;
+ const struct zl3073x_out *out = zl3073x_out_state_get(zldev, index);
+ const struct zl3073x_synth *synth;
+ u8 synth_id;

/* Get synthesizer connected to given output */
- synth = zl3073x_out_synth_get(zldev, index);
+ synth_id = zl3073x_out_synth_get(out);
+ synth = zl3073x_synth_state_get(zldev, synth_id);

/* Return DPLL that drives the synth */
- return zl3073x_synth_dpll_get(zldev, synth);
+ return zl3073x_synth_dpll_get(synth);
}

/**
- * zl3073x_out_is_diff - check if the given output is differential
+ * zl3073x_dev_out_is_diff - check if the given output is differential
* @zldev: pointer to zl3073x device
* @index: output index
*
* Return: true if output is differential, false if output is single-ended
*/
static inline bool
-zl3073x_out_is_diff(struct zl3073x_dev *zldev, u8 index)
+zl3073x_dev_out_is_diff(struct zl3073x_dev *zldev, u8 index)
{
- switch (zl3073x_out_signal_format_get(zldev, index)) {
- case ZL_OUTPUT_MODE_SIGNAL_FORMAT_LVDS:
- case ZL_OUTPUT_MODE_SIGNAL_FORMAT_DIFF:
- case ZL_OUTPUT_MODE_SIGNAL_FORMAT_LOWVCM:
- return true;
- default:
- break;
- }
+ const struct zl3073x_out *out = zl3073x_out_state_get(zldev, index);

- return false;
+ return zl3073x_out_is_diff(out);
}

/**
- * zl3073x_output_pin_is_enabled - check if the given output pin is enabled
+ * zl3073x_dev_output_pin_is_enabled - check if the given output pin is enabled
* @zldev: pointer to zl3073x device
* @id: output pin id
*
@@ -392,16 +374,21 @@ zl3073x_out_is_diff(struct zl3073x_dev *zldev, u8 index)
* Return: true if output pin is enabled, false if output pin is disabled
*/
static inline bool
-zl3073x_output_pin_is_enabled(struct zl3073x_dev *zldev, u8 id)
+zl3073x_dev_output_pin_is_enabled(struct zl3073x_dev *zldev, u8 id)
{
- u8 output = zl3073x_output_pin_out_get(id);
+ u8 out_id = zl3073x_output_pin_out_get(id);
+ const struct zl3073x_out *out;
+
+ out = zl3073x_out_state_get(zldev, out_id);

- /* Check if the whole output is enabled */
- if (!zl3073x_out_is_enabled(zldev, output))
+ /* Check if the output is enabled - call _dev_ helper that
+ * additionally checks for attached synth enablement.
+ */
+ if (!zl3073x_dev_out_is_enabled(zldev, out_id))
return false;

/* Check signal format */
- switch (zl3073x_out_signal_format_get(zldev, output)) {
+ switch (zl3073x_out_signal_format_get(out)) {
case ZL_OUTPUT_MODE_SIGNAL_FORMAT_DISABLED:
/* Both output pins are disabled by signal format */
return false;
diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c
index f93f9a458324..11ca32e1bb82 100644
--- a/drivers/dpll/zl3073x/dpll.c
+++ b/drivers/dpll/zl3073x/dpll.c
@@ -35,6 +35,7 @@
* @prio: pin priority <0, 14>
* @selectable: pin is selectable in automatic mode
* @esync_control: embedded sync is controllable
+ * @phase_gran: phase adjustment granularity
* @pin_state: last saved pin state
* @phase_offset: last saved pin phase offset
* @freq_offset: last saved fractional frequency offset
@@ -49,6 +50,7 @@ struct zl3073x_dpll_pin {
u8 prio;
bool selectable;
bool esync_control;
+ s32 phase_gran;
enum dpll_pin_state pin_state;
s64 phase_offset;
s64 freq_offset;
@@ -951,21 +953,19 @@ zl3073x_dpll_output_pin_esync_get(const struct dpll_pin *dpll_pin,
struct zl3073x_dpll *zldpll = dpll_priv;
struct zl3073x_dev *zldev = zldpll->dev;
struct zl3073x_dpll_pin *pin = pin_priv;
- struct device *dev = zldev->dev;
- u32 esync_period, esync_width;
- u8 clock_type, synth;
- u8 out, output_mode;
- u32 output_div;
+ const struct zl3073x_synth *synth;
+ const struct zl3073x_out *out;
+ u8 clock_type, out_id;
u32 synth_freq;
- int rc;

- out = zl3073x_output_pin_out_get(pin->id);
+ out_id = zl3073x_output_pin_out_get(pin->id);
+ out = zl3073x_out_state_get(zldev, out_id);

/* If N-division is enabled, esync is not supported. The register used
* for N-division is also used for the esync divider so both cannot
* be used.
*/
- switch (zl3073x_out_signal_format_get(zldev, out)) {
+ switch (zl3073x_out_signal_format_get(out)) {
case ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV:
case ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV_INV:
return -EOPNOTSUPP;
@@ -973,38 +973,11 @@ zl3073x_dpll_output_pin_esync_get(const struct dpll_pin *dpll_pin,
break;
}

- guard(mutex)(&zldev->multiop_lock);
-
- /* Read output configuration into mailbox */
- rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD,
- ZL_REG_OUTPUT_MB_MASK, BIT(out));
- if (rc)
- return rc;
-
- /* Read output mode */
- rc = zl3073x_read_u8(zldev, ZL_REG_OUTPUT_MODE, &output_mode);
- if (rc)
- return rc;
+ /* Get attached synth frequency */
+ synth = zl3073x_synth_state_get(zldev, zl3073x_out_synth_get(out));
+ synth_freq = zl3073x_synth_freq_get(synth);

- /* Read output divisor */
- rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_DIV, &output_div);
- if (rc)
- return rc;
-
- /* Check output divisor for zero */
- if (!output_div) {
- dev_err(dev, "Zero divisor for OUTPUT%u got from device\n",
- out);
- return -EINVAL;
- }
-
- /* Get synth attached to output pin */
- synth = zl3073x_out_synth_get(zldev, out);
-
- /* Get synth frequency */
- synth_freq = zl3073x_synth_freq_get(zldev, synth);
-
- clock_type = FIELD_GET(ZL_OUTPUT_MODE_CLOCK_TYPE, output_mode);
+ clock_type = FIELD_GET(ZL_OUTPUT_MODE_CLOCK_TYPE, out->mode);
if (clock_type != ZL_OUTPUT_MODE_CLOCK_TYPE_ESYNC) {
/* No need to read esync data if it is not enabled */
esync->freq = 0;
@@ -1013,38 +986,21 @@ zl3073x_dpll_output_pin_esync_get(const struct dpll_pin *dpll_pin,
goto finish;
}

- /* Read esync period */
- rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_ESYNC_PERIOD, &esync_period);
- if (rc)
- return rc;
-
- /* Check esync divisor for zero */
- if (!esync_period) {
- dev_err(dev, "Zero esync divisor for OUTPUT%u got from device\n",
- out);
- return -EINVAL;
- }
-
- /* Get esync pulse width in units of half synth cycles */
- rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_ESYNC_WIDTH, &esync_width);
- if (rc)
- return rc;
-
/* Compute esync frequency */
- esync->freq = synth_freq / output_div / esync_period;
+ esync->freq = synth_freq / out->div / out->esync_n_period;

/* By comparing the esync_pulse_width to the half of the pulse width
* the esync pulse percentage can be determined.
* Note that half pulse width is in units of half synth cycles, which
* is why it reduces down to be output_div.
*/
- esync->pulse = (50 * esync_width) / output_div;
+ esync->pulse = (50 * out->esync_n_width) / out->div;

finish:
/* Set supported esync ranges if the pin supports esync control and
* if the output frequency is > 1 Hz.
*/
- if (pin->esync_control && (synth_freq / output_div) > 1) {
+ if (pin->esync_control && (synth_freq / out->div) > 1) {
esync->range = esync_freq_ranges;
esync->range_num = ARRAY_SIZE(esync_freq_ranges);
} else {
@@ -1062,21 +1018,22 @@ zl3073x_dpll_output_pin_esync_set(const struct dpll_pin *dpll_pin,
void *dpll_priv, u64 freq,
struct netlink_ext_ack *extack)
{
- u32 esync_period, esync_width, output_div;
struct zl3073x_dpll *zldpll = dpll_priv;
struct zl3073x_dev *zldev = zldpll->dev;
struct zl3073x_dpll_pin *pin = pin_priv;
- u8 clock_type, out, output_mode, synth;
+ const struct zl3073x_synth *synth;
+ struct zl3073x_out out;
+ u8 clock_type, out_id;
u32 synth_freq;
- int rc;

- out = zl3073x_output_pin_out_get(pin->id);
+ out_id = zl3073x_output_pin_out_get(pin->id);
+ out = *zl3073x_out_state_get(zldev, out_id);

/* If N-division is enabled, esync is not supported. The register used
* for N-division is also used for the esync divider so both cannot
* be used.
*/
- switch (zl3073x_out_signal_format_get(zldev, out)) {
+ switch (zl3073x_out_signal_format_get(&out)) {
case ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV:
case ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV_INV:
return -EOPNOTSUPP;
@@ -1084,19 +1041,6 @@ zl3073x_dpll_output_pin_esync_set(const struct dpll_pin *dpll_pin,
break;
}

- guard(mutex)(&zldev->multiop_lock);
-
- /* Read output configuration into mailbox */
- rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD,
- ZL_REG_OUTPUT_MB_MASK, BIT(out));
- if (rc)
- return rc;
-
- /* Read output mode */
- rc = zl3073x_read_u8(zldev, ZL_REG_OUTPUT_MODE, &output_mode);
- if (rc)
- return rc;
-
/* Select clock type */
if (freq)
clock_type = ZL_OUTPUT_MODE_CLOCK_TYPE_ESYNC;
@@ -1104,38 +1048,19 @@ zl3073x_dpll_output_pin_esync_set(const struct dpll_pin *dpll_pin,
clock_type = ZL_OUTPUT_MODE_CLOCK_TYPE_NORMAL;

/* Update clock type in output mode */
- output_mode &= ~ZL_OUTPUT_MODE_CLOCK_TYPE;
- output_mode |= FIELD_PREP(ZL_OUTPUT_MODE_CLOCK_TYPE, clock_type);
- rc = zl3073x_write_u8(zldev, ZL_REG_OUTPUT_MODE, output_mode);
- if (rc)
- return rc;
+ out.mode &= ~ZL_OUTPUT_MODE_CLOCK_TYPE;
+ out.mode |= FIELD_PREP(ZL_OUTPUT_MODE_CLOCK_TYPE, clock_type);

/* If esync is being disabled just write mailbox and finish */
if (!freq)
goto write_mailbox;

- /* Get synth attached to output pin */
- synth = zl3073x_out_synth_get(zldev, out);
-
- /* Get synth frequency */
- synth_freq = zl3073x_synth_freq_get(zldev, synth);
-
- rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_DIV, &output_div);
- if (rc)
- return rc;
-
- /* Check output divisor for zero */
- if (!output_div) {
- dev_err(zldev->dev,
- "Zero divisor for OUTPUT%u got from device\n", out);
- return -EINVAL;
- }
+ /* Get attached synth frequency */
+ synth = zl3073x_synth_state_get(zldev, zl3073x_out_synth_get(&out));
+ synth_freq = zl3073x_synth_freq_get(synth);

/* Compute and update esync period */
- esync_period = synth_freq / (u32)freq / output_div;
- rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_ESYNC_PERIOD, esync_period);
- if (rc)
- return rc;
+ out.esync_n_period = synth_freq / (u32)freq / out.div;

/* Half of the period in units of 1/2 synth cycle can be represented by
* the output_div. To get the supported esync pulse width of 25% of the
@@ -1143,15 +1068,11 @@ zl3073x_dpll_output_pin_esync_set(const struct dpll_pin *dpll_pin,
* assumes that output_div is even, otherwise some resolution will be
* lost.
*/
- esync_width = output_div / 2;
- rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_ESYNC_WIDTH, esync_width);
- if (rc)
- return rc;
+ out.esync_n_width = out.div / 2;

write_mailbox:
/* Commit output configuration */
- return zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_WR,
- ZL_REG_OUTPUT_MB_MASK, BIT(out));
+ return zl3073x_out_state_set(zldev, out_id, &out);
}

static int
@@ -1164,83 +1085,46 @@ zl3073x_dpll_output_pin_frequency_get(const struct dpll_pin *dpll_pin,
struct zl3073x_dpll *zldpll = dpll_priv;
struct zl3073x_dev *zldev = zldpll->dev;
struct zl3073x_dpll_pin *pin = pin_priv;
- struct device *dev = zldev->dev;
- u8 out, signal_format, synth;
- u32 output_div, synth_freq;
- int rc;
-
- out = zl3073x_output_pin_out_get(pin->id);
- synth = zl3073x_out_synth_get(zldev, out);
- synth_freq = zl3073x_synth_freq_get(zldev, synth);
-
- guard(mutex)(&zldev->multiop_lock);
-
- /* Read output configuration into mailbox */
- rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD,
- ZL_REG_OUTPUT_MB_MASK, BIT(out));
- if (rc)
- return rc;
+ const struct zl3073x_synth *synth;
+ const struct zl3073x_out *out;
+ u32 synth_freq;
+ u8 out_id;

- rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_DIV, &output_div);
- if (rc)
- return rc;
+ out_id = zl3073x_output_pin_out_get(pin->id);
+ out = zl3073x_out_state_get(zldev, out_id);

- /* Check output divisor for zero */
- if (!output_div) {
- dev_err(dev, "Zero divisor for output %u got from device\n",
- out);
- return -EINVAL;
- }
+ /* Get attached synth frequency */
+ synth = zl3073x_synth_state_get(zldev, zl3073x_out_synth_get(out));
+ synth_freq = zl3073x_synth_freq_get(synth);

- /* Read used signal format for the given output */
- signal_format = zl3073x_out_signal_format_get(zldev, out);
-
- switch (signal_format) {
+ switch (zl3073x_out_signal_format_get(out)) {
case ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV:
case ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV_INV:
/* In case of divided format we have to distiguish between
* given output pin type.
+ *
+ * For P-pin the resulting frequency is computed as simple
+ * division of synth frequency and output divisor.
+ *
+ * For N-pin we have to divide additionally by divisor stored
+ * in esync_n_period output mailbox register that is used as
+ * N-pin divisor for these modes.
*/
- if (zl3073x_dpll_is_p_pin(pin)) {
- /* For P-pin the resulting frequency is computed as
- * simple division of synth frequency and output
- * divisor.
- */
- *frequency = synth_freq / output_div;
- } else {
- /* For N-pin we have to divide additionally by
- * divisor stored in esync_period output mailbox
- * register that is used as N-pin divisor for these
- * modes.
- */
- u32 ndiv;
-
- rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_ESYNC_PERIOD,
- &ndiv);
- if (rc)
- return rc;
+ *frequency = synth_freq / out->div;

- /* Check N-pin divisor for zero */
- if (!ndiv) {
- dev_err(dev,
- "Zero N-pin divisor for output %u got from device\n",
- out);
- return -EINVAL;
- }
+ if (!zl3073x_dpll_is_p_pin(pin))
+ *frequency = (u32)*frequency / out->esync_n_period;

- /* Compute final divisor for N-pin */
- *frequency = synth_freq / output_div / ndiv;
- }
break;
default:
/* In other modes the resulting frequency is computed as
* division of synth frequency and output divisor.
*/
- *frequency = synth_freq / output_div;
+ *frequency = synth_freq / out->div;
break;
}

- return rc;
+ return 0;
}

static int
@@ -1253,28 +1137,21 @@ zl3073x_dpll_output_pin_frequency_set(const struct dpll_pin *dpll_pin,
struct zl3073x_dpll *zldpll = dpll_priv;
struct zl3073x_dev *zldev = zldpll->dev;
struct zl3073x_dpll_pin *pin = pin_priv;
- struct device *dev = zldev->dev;
- u32 output_n_freq, output_p_freq;
- u8 out, signal_format, synth;
- u32 cur_div, new_div, ndiv;
- u32 synth_freq;
- int rc;
+ const struct zl3073x_synth *synth;
+ u8 out_id, signal_format;
+ u32 new_div, synth_freq;
+ struct zl3073x_out out;
+
+ out_id = zl3073x_output_pin_out_get(pin->id);
+ out = *zl3073x_out_state_get(zldev, out_id);

- out = zl3073x_output_pin_out_get(pin->id);
- synth = zl3073x_out_synth_get(zldev, out);
- synth_freq = zl3073x_synth_freq_get(zldev, synth);
+ /* Get attached synth frequency and compute new divisor */
+ synth = zl3073x_synth_state_get(zldev, zl3073x_out_synth_get(&out));
+ synth_freq = zl3073x_synth_freq_get(synth);
new_div = synth_freq / (u32)frequency;

/* Get used signal format for the given output */
- signal_format = zl3073x_out_signal_format_get(zldev, out);
-
- guard(mutex)(&zldev->multiop_lock);
-
- /* Load output configuration */
- rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD,
- ZL_REG_OUTPUT_MB_MASK, BIT(out));
- if (rc)
- return rc;
+ signal_format = zl3073x_out_signal_format_get(&out);

/* Check signal format */
if (signal_format != ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV &&
@@ -1282,99 +1159,50 @@ zl3073x_dpll_output_pin_frequency_set(const struct dpll_pin *dpll_pin,
/* For non N-divided signal formats the frequency is computed
* as division of synth frequency and output divisor.
*/
- rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_DIV, new_div);
- if (rc)
- return rc;
+ out.div = new_div;

/* For 50/50 duty cycle the divisor is equal to width */
- rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_WIDTH, new_div);
- if (rc)
- return rc;
+ out.width = new_div;

/* Commit output configuration */
- return zl3073x_mb_op(zldev,
- ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_WR,
- ZL_REG_OUTPUT_MB_MASK, BIT(out));
+ return zl3073x_out_state_set(zldev, out_id, &out);
}

- /* For N-divided signal format get current divisor */
- rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_DIV, &cur_div);
- if (rc)
- return rc;
-
- /* Check output divisor for zero */
- if (!cur_div) {
- dev_err(dev, "Zero divisor for output %u got from device\n",
- out);
- return -EINVAL;
- }
-
- /* Get N-pin divisor (shares the same register with esync */
- rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_ESYNC_PERIOD, &ndiv);
- if (rc)
- return rc;
-
- /* Check N-pin divisor for zero */
- if (!ndiv) {
- dev_err(dev,
- "Zero N-pin divisor for output %u got from device\n",
- out);
- return -EINVAL;
- }
-
- /* Compute current output frequency for P-pin */
- output_p_freq = synth_freq / cur_div;
-
- /* Compute current N-pin frequency */
- output_n_freq = output_p_freq / ndiv;
-
if (zl3073x_dpll_is_p_pin(pin)) {
/* We are going to change output frequency for P-pin but
* if the requested frequency is less than current N-pin
* frequency then indicate a failure as we are not able
* to compute N-pin divisor to keep its frequency unchanged.
+ *
+ * Update divisor for N-pin to keep N-pin frequency.
*/
- if (frequency <= output_n_freq)
+ out.esync_n_period = (out.esync_n_period * out.div) / new_div;
+ if (!out.esync_n_period)
return -EINVAL;

/* Update the output divisor */
- rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_DIV, new_div);
- if (rc)
- return rc;
+ out.div = new_div;

/* For 50/50 duty cycle the divisor is equal to width */
- rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_WIDTH, new_div);
- if (rc)
- return rc;
-
- /* Compute new divisor for N-pin */
- ndiv = (u32)frequency / output_n_freq;
+ out.width = out.div;
} else {
/* We are going to change frequency of N-pin but if
* the requested freq is greater or equal than freq of P-pin
* in the output pair we cannot compute divisor for the N-pin.
* In this case indicate a failure.
+ *
+ * Update divisor for N-pin
*/
- if (output_p_freq <= frequency)
+ out.esync_n_period = div64_u64(synth_freq, frequency * out.div);
+ if (!out.esync_n_period)
return -EINVAL;
-
- /* Compute new divisor for N-pin */
- ndiv = output_p_freq / (u32)frequency;
}

- /* Update divisor for the N-pin */
- rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_ESYNC_PERIOD, ndiv);
- if (rc)
- return rc;
-
/* For 50/50 duty cycle the divisor is equal to width */
- rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_ESYNC_WIDTH, ndiv);
- if (rc)
- return rc;
+ out.esync_n_width = out.esync_n_period;

/* Commit output configuration */
- return zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_WR,
- ZL_REG_OUTPUT_MB_MASK, BIT(out));
+ return zl3073x_out_state_set(zldev, out_id, &out);
}

static int
@@ -1388,42 +1216,16 @@ zl3073x_dpll_output_pin_phase_adjust_get(const struct dpll_pin *dpll_pin,
struct zl3073x_dpll *zldpll = dpll_priv;
struct zl3073x_dev *zldev = zldpll->dev;
struct zl3073x_dpll_pin *pin = pin_priv;
- u32 synth_freq;
- s32 phase_comp;
- u8 out, synth;
- int rc;
-
- out = zl3073x_output_pin_out_get(pin->id);
- synth = zl3073x_out_synth_get(zldev, out);
- synth_freq = zl3073x_synth_freq_get(zldev, synth);
+ const struct zl3073x_out *out;
+ u8 out_id;

- /* Check synth freq for zero */
- if (!synth_freq) {
- dev_err(zldev->dev, "Got zero synth frequency for output %u\n",
- out);
- return -EINVAL;
- }
-
- guard(mutex)(&zldev->multiop_lock);
-
- /* Read output configuration */
- rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD,
- ZL_REG_OUTPUT_MB_MASK, BIT(out));
- if (rc)
- return rc;
-
- /* Read current output phase compensation */
- rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_PHASE_COMP, &phase_comp);
- if (rc)
- return rc;
+ out_id = zl3073x_output_pin_out_get(pin->id);
+ out = zl3073x_out_state_get(zldev, out_id);

- /* Value in register is expressed in half synth clock cycles */
- phase_comp *= (int)div_u64(PSEC_PER_SEC, 2 * synth_freq);
+ /* The value in the register is expressed in half synth clock cycles. */
+ *phase_adjust = out->phase_comp * pin->phase_gran;

- /* Reverse two's complement negation applied during 'set' */
- *phase_adjust = -phase_comp;
-
- return rc;
+ return 0;
}

static int
@@ -1437,52 +1239,17 @@ zl3073x_dpll_output_pin_phase_adjust_set(const struct dpll_pin *dpll_pin,
struct zl3073x_dpll *zldpll = dpll_priv;
struct zl3073x_dev *zldev = zldpll->dev;
struct zl3073x_dpll_pin *pin = pin_priv;
- int half_synth_cycle;
- u32 synth_freq;
- u8 out, synth;
- int rc;
+ struct zl3073x_out out;
+ u8 out_id;

- /* Get attached synth */
- out = zl3073x_output_pin_out_get(pin->id);
- synth = zl3073x_out_synth_get(zldev, out);
+ out_id = zl3073x_output_pin_out_get(pin->id);
+ out = *zl3073x_out_state_get(zldev, out_id);

- /* Get synth's frequency */
- synth_freq = zl3073x_synth_freq_get(zldev, synth);
-
- /* Value in register is expressed in half synth clock cycles so
- * the given phase adjustment a multiple of half synth clock.
- */
- half_synth_cycle = (int)div_u64(PSEC_PER_SEC, 2 * synth_freq);
-
- if ((phase_adjust % half_synth_cycle) != 0) {
- NL_SET_ERR_MSG_FMT(extack,
- "Phase adjustment value has to be multiple of %d",
- half_synth_cycle);
- return -EINVAL;
- }
- phase_adjust /= half_synth_cycle;
-
- /* The value in the register is stored as two's complement negation
- * of requested value.
- */
- phase_adjust = -phase_adjust;
-
- guard(mutex)(&zldev->multiop_lock);
-
- /* Read output configuration */
- rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD,
- ZL_REG_OUTPUT_MB_MASK, BIT(out));
- if (rc)
- return rc;
-
- /* Write the requested value into the compensation register */
- rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_PHASE_COMP, phase_adjust);
- if (rc)
- return rc;
+ /* The value in the register is expressed in half synth clock cycles. */
+ out.phase_comp = phase_adjust / pin->phase_gran;

/* Update output configuration from mailbox */
- return zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_WR,
- ZL_REG_OUTPUT_MB_MASK, BIT(out));
+ return zl3073x_out_state_set(zldev, out_id, &out);
}

static int
@@ -1758,9 +1525,10 @@ zl3073x_dpll_pin_register(struct zl3073x_dpll_pin *pin, u32 index)
if (IS_ERR(props))
return PTR_ERR(props);

- /* Save package label & esync capability */
+ /* Save package label, esync capability and phase adjust granularity */
strscpy(pin->label, props->package_label);
pin->esync_control = props->esync_control;
+ pin->phase_gran = props->dpll_props.phase_gran;

if (zl3073x_dpll_is_input_pin(pin)) {
rc = zl3073x_dpll_ref_prio_get(pin, &pin->prio);
@@ -1886,25 +1654,23 @@ zl3073x_dpll_pin_is_registrable(struct zl3073x_dpll *zldpll,
if (zldpll->refsel_mode == ZL_DPLL_MODE_REFSEL_MODE_NCO)
return false;

- is_diff = zl3073x_ref_is_diff(zldev, ref);
- is_enabled = zl3073x_ref_is_enabled(zldev, ref);
+ is_diff = zl3073x_dev_ref_is_diff(zldev, ref);
+ is_enabled = zl3073x_dev_ref_is_enabled(zldev, ref);
} else {
/* Output P&N pair shares single HW output */
u8 out = zl3073x_output_pin_out_get(index);

- name = "OUT";
-
/* Skip the pin if it is connected to different DPLL channel */
- if (zl3073x_out_dpll_get(zldev, out) != zldpll->id) {
+ if (zl3073x_dev_out_dpll_get(zldev, out) != zldpll->id) {
dev_dbg(zldev->dev,
- "%s%u is driven by different DPLL\n", name,
- out);
+ "OUT%u is driven by different DPLL\n", out);

return false;
}

- is_diff = zl3073x_out_is_diff(zldev, out);
- is_enabled = zl3073x_output_pin_is_enabled(zldev, index);
+ name = "OUT";
+ is_diff = zl3073x_dev_out_is_diff(zldev, out);
+ is_enabled = zl3073x_dev_output_pin_is_enabled(zldev, index);
}

/* Skip N-pin if the corresponding input/output is differential */
@@ -2154,7 +1920,7 @@ zl3073x_dpll_pin_ffo_check(struct zl3073x_dpll_pin *pin)
return false;

/* Get the latest measured ref's ffo */
- ffo = zl3073x_ref_ffo_get(zldev, ref);
+ ffo = zl3073x_dev_ref_ffo_get(zldev, ref);

/* Compare with previous value */
if (pin->freq_offset != ffo) {
diff --git a/drivers/dpll/zl3073x/out.c b/drivers/dpll/zl3073x/out.c
new file mode 100644
index 000000000000..86829a0c1c02
--- /dev/null
+++ b/drivers/dpll/zl3073x/out.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bitfield.h>
+#include <linux/cleanup.h>
+#include <linux/dev_printk.h>
+#include <linux/string.h>
+#include <linux/string_choices.h>
+#include <linux/types.h>
+
+#include "core.h"
+#include "out.h"
+
+/**
+ * zl3073x_out_state_fetch - fetch output state from hardware
+ * @zldev: pointer to zl3073x_dev structure
+ * @index: output index to fetch state for
+ *
+ * Function fetches state of the given output from hardware and stores it
+ * for later use.
+ *
+ * Return: 0 on success, <0 on error
+ */
+int zl3073x_out_state_fetch(struct zl3073x_dev *zldev, u8 index)
+{
+ struct zl3073x_out *out = &zldev->out[index];
+ int rc;
+
+ /* Read output configuration */
+ rc = zl3073x_read_u8(zldev, ZL_REG_OUTPUT_CTRL(index), &out->ctrl);
+ if (rc)
+ return rc;
+
+ dev_dbg(zldev->dev, "OUT%u is %s and connected to SYNTH%u\n", index,
+ str_enabled_disabled(zl3073x_out_is_enabled(out)),
+ zl3073x_out_synth_get(out));
+
+ guard(mutex)(&zldev->multiop_lock);
+
+ /* Read output configuration */
+ rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD,
+ ZL_REG_OUTPUT_MB_MASK, BIT(index));
+ if (rc)
+ return rc;
+
+ /* Read output mode */
+ rc = zl3073x_read_u8(zldev, ZL_REG_OUTPUT_MODE, &out->mode);
+ if (rc)
+ return rc;
+
+ dev_dbg(zldev->dev, "OUT%u has signal format 0x%02x\n", index,
+ zl3073x_out_signal_format_get(out));
+
+ /* Read output divisor */
+ rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_DIV, &out->div);
+ if (rc)
+ return rc;
+
+ if (!out->div) {
+ dev_err(zldev->dev, "Zero divisor for OUT%u got from device\n",
+ index);
+ return -EINVAL;
+ }
+
+ dev_dbg(zldev->dev, "OUT%u divisor: %u\n", index, out->div);
+
+ /* Read output width */
+ rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_WIDTH, &out->width);
+ if (rc)
+ return rc;
+
+ rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_ESYNC_PERIOD,
+ &out->esync_n_period);
+ if (rc)
+ return rc;
+
+ if (!out->esync_n_period) {
+ dev_err(zldev->dev,
+ "Zero esync divisor for OUT%u got from device\n",
+ index);
+ return -EINVAL;
+ }
+
+ rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_ESYNC_WIDTH,
+ &out->esync_n_width);
+ if (rc)
+ return rc;
+
+ rc = zl3073x_read_u32(zldev, ZL_REG_OUTPUT_PHASE_COMP,
+ &out->phase_comp);
+ if (rc)
+ return rc;
+
+ return rc;
+}
+
+/**
+ * zl3073x_out_state_get - get current output state
+ * @zldev: pointer to zl3073x_dev structure
+ * @index: output index to get state for
+ *
+ * Return: pointer to given output state
+ */
+const struct zl3073x_out *zl3073x_out_state_get(struct zl3073x_dev *zldev,
+ u8 index)
+{
+ return &zldev->out[index];
+}
+
+int zl3073x_out_state_set(struct zl3073x_dev *zldev, u8 index,
+ const struct zl3073x_out *out)
+{
+ struct zl3073x_out *dout = &zldev->out[index];
+ int rc;
+
+ guard(mutex)(&zldev->multiop_lock);
+
+ /* Read output configuration into mailbox */
+ rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_RD,
+ ZL_REG_OUTPUT_MB_MASK, BIT(index));
+ if (rc)
+ return rc;
+
+ /* Update mailbox with changed values */
+ if (dout->div != out->div)
+ rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_DIV, out->div);
+ if (!rc && dout->width != out->width)
+ rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_WIDTH, out->width);
+ if (!rc && dout->esync_n_period != out->esync_n_period)
+ rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_ESYNC_PERIOD,
+ out->esync_n_period);
+ if (!rc && dout->esync_n_width != out->esync_n_width)
+ rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_ESYNC_WIDTH,
+ out->esync_n_width);
+ if (!rc && dout->mode != out->mode)
+ rc = zl3073x_write_u8(zldev, ZL_REG_OUTPUT_MODE, out->mode);
+ if (!rc && dout->phase_comp != out->phase_comp)
+ rc = zl3073x_write_u32(zldev, ZL_REG_OUTPUT_PHASE_COMP,
+ out->phase_comp);
+ if (rc)
+ return rc;
+
+ /* Commit output configuration */
+ rc = zl3073x_mb_op(zldev, ZL_REG_OUTPUT_MB_SEM, ZL_OUTPUT_MB_SEM_WR,
+ ZL_REG_OUTPUT_MB_MASK, BIT(index));
+ if (rc)
+ return rc;
+
+ /* After successful commit store new state */
+ dout->div = out->div;
+ dout->width = out->width;
+ dout->esync_n_period = out->esync_n_period;
+ dout->esync_n_width = out->esync_n_width;
+ dout->mode = out->mode;
+ dout->phase_comp = out->phase_comp;
+
+ return 0;
+}
diff --git a/drivers/dpll/zl3073x/out.h b/drivers/dpll/zl3073x/out.h
new file mode 100644
index 000000000000..e8ea7a0e0f07
--- /dev/null
+++ b/drivers/dpll/zl3073x/out.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ZL3073X_OUT_H
+#define _ZL3073X_OUT_H
+
+#include <linux/bitfield.h>
+#include <linux/types.h>
+
+#include "regs.h"
+
+struct zl3073x_dev;
+
+/**
+ * struct zl3073x_out - output state
+ * @div: output divisor
+ * @width: output pulse width
+ * @esync_n_period: embedded sync or n-pin period (for n-div formats)
+ * @esync_n_width: embedded sync or n-pin pulse width
+ * @phase_comp: phase compensation
+ * @ctrl: output control
+ * @mode: output mode
+ */
+struct zl3073x_out {
+ u32 div;
+ u32 width;
+ u32 esync_n_period;
+ u32 esync_n_width;
+ s32 phase_comp;
+ u8 ctrl;
+ u8 mode;
+};
+
+int zl3073x_out_state_fetch(struct zl3073x_dev *zldev, u8 index);
+const struct zl3073x_out *zl3073x_out_state_get(struct zl3073x_dev *zldev,
+ u8 index);
+
+int zl3073x_out_state_set(struct zl3073x_dev *zldev, u8 index,
+ const struct zl3073x_out *out);
+
+/**
+ * zl3073x_out_signal_format_get - get output signal format
+ * @out: pointer to out state
+ *
+ * Return: signal format of given output
+ */
+static inline u8 zl3073x_out_signal_format_get(const struct zl3073x_out *out)
+{
+ return FIELD_GET(ZL_OUTPUT_MODE_SIGNAL_FORMAT, out->mode);
+}
+
+/**
+ * zl3073x_out_is_diff - check if the given output is differential
+ * @out: pointer to out state
+ *
+ * Return: true if output is differential, false if output is single-ended
+ */
+static inline bool zl3073x_out_is_diff(const struct zl3073x_out *out)
+{
+ switch (zl3073x_out_signal_format_get(out)) {
+ case ZL_OUTPUT_MODE_SIGNAL_FORMAT_LVDS:
+ case ZL_OUTPUT_MODE_SIGNAL_FORMAT_DIFF:
+ case ZL_OUTPUT_MODE_SIGNAL_FORMAT_LOWVCM:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+/**
+ * zl3073x_out_is_enabled - check if the given output is enabled
+ * @out: pointer to out state
+ *
+ * Return: true if output is enabled, false if output is disabled
+ */
+static inline bool zl3073x_out_is_enabled(const struct zl3073x_out *out)
+{
+ return !!FIELD_GET(ZL_OUTPUT_CTRL_EN, out->ctrl);
+}
+
+/**
+ * zl3073x_out_synth_get - get synth connected to given output
+ * @out: pointer to out state
+ *
+ * Return: index of synth connected to given output.
+ */
+static inline u8 zl3073x_out_synth_get(const struct zl3073x_out *out)
+{
+ return FIELD_GET(ZL_OUTPUT_CTRL_SYNTH_SEL, out->ctrl);
+}
+
+#endif /* _ZL3073X_OUT_H */
diff --git a/drivers/dpll/zl3073x/prop.c b/drivers/dpll/zl3073x/prop.c
index 4cf7e8aefcb3..4ed153087570 100644
--- a/drivers/dpll/zl3073x/prop.c
+++ b/drivers/dpll/zl3073x/prop.c
@@ -46,10 +46,10 @@ zl3073x_pin_check_freq(struct zl3073x_dev *zldev, enum dpll_pin_direction dir,

/* Get output pin synthesizer */
out = zl3073x_output_pin_out_get(id);
- synth = zl3073x_out_synth_get(zldev, out);
+ synth = zl3073x_dev_out_synth_get(zldev, out);

/* Get synth frequency */
- synth_freq = zl3073x_synth_freq_get(zldev, synth);
+ synth_freq = zl3073x_dev_synth_freq_get(zldev, synth);

/* Check the frequency divides synth frequency */
if (synth_freq % (u32)freq)
@@ -93,13 +93,13 @@ zl3073x_prop_pin_package_label_set(struct zl3073x_dev *zldev,

prefix = "REF";
ref = zl3073x_input_pin_ref_get(id);
- is_diff = zl3073x_ref_is_diff(zldev, ref);
+ is_diff = zl3073x_dev_ref_is_diff(zldev, ref);
} else {
u8 out;

prefix = "OUT";
out = zl3073x_output_pin_out_get(id);
- is_diff = zl3073x_out_is_diff(zldev, out);
+ is_diff = zl3073x_dev_out_is_diff(zldev, out);
}

if (!is_diff)
@@ -208,7 +208,18 @@ struct zl3073x_pin_props *zl3073x_pin_props_get(struct zl3073x_dev *zldev,
DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE |
DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
} else {
+ u8 out, synth;
+ u32 f;
+
props->dpll_props.type = DPLL_PIN_TYPE_GNSS;
+
+ /* The output pin phase adjustment granularity equals half of
+ * the synth frequency count.
+ */
+ out = zl3073x_output_pin_out_get(index);
+ synth = zl3073x_dev_out_synth_get(zldev, out);
+ f = 2 * zl3073x_dev_synth_freq_get(zldev, synth);
+ props->dpll_props.phase_gran = f ? div_u64(PSEC_PER_SEC, f) : 1;
}

props->dpll_props.phase_range.min = S32_MIN;
diff --git a/drivers/dpll/zl3073x/ref.c b/drivers/dpll/zl3073x/ref.c
new file mode 100644
index 000000000000..6abd6288a02a
--- /dev/null
+++ b/drivers/dpll/zl3073x/ref.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bitfield.h>
+#include <linux/cleanup.h>
+#include <linux/dev_printk.h>
+#include <linux/string.h>
+#include <linux/string_choices.h>
+#include <linux/types.h>
+
+#include "core.h"
+#include "ref.h"
+
+/**
+ * zl3073x_ref_freq_factorize - factorize given frequency
+ * @freq: input frequency
+ * @base: base frequency
+ * @mult: multiplier
+ *
+ * Checks if the given frequency can be factorized using one of the
+ * supported base frequencies. If so the base frequency and multiplier
+ * are stored into appropriate parameters if they are not NULL.
+ *
+ * Return: 0 on success, -EINVAL if the frequency cannot be factorized
+ */
+int
+zl3073x_ref_freq_factorize(u32 freq, u16 *base, u16 *mult)
+{
+ static const u16 base_freqs[] = {
+ 1, 2, 4, 5, 8, 10, 16, 20, 25, 32, 40, 50, 64, 80, 100, 125,
+ 128, 160, 200, 250, 256, 320, 400, 500, 625, 640, 800, 1000,
+ 1250, 1280, 1600, 2000, 2500, 3125, 3200, 4000, 5000, 6250,
+ 6400, 8000, 10000, 12500, 15625, 16000, 20000, 25000, 31250,
+ 32000, 40000, 50000, 62500,
+ };
+ u32 div;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(base_freqs); i++) {
+ div = freq / base_freqs[i];
+
+ if (div <= U16_MAX && (freq % base_freqs[i]) == 0) {
+ if (base)
+ *base = base_freqs[i];
+ if (mult)
+ *mult = div;
+
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * zl3073x_ref_state_fetch - fetch input reference state from hardware
+ * @zldev: pointer to zl3073x_dev structure
+ * @index: input reference index to fetch state for
+ *
+ * Function fetches state for the given input reference from hardware and
+ * stores it for later use.
+ *
+ * Return: 0 on success, <0 on error
+ */
+int zl3073x_ref_state_fetch(struct zl3073x_dev *zldev, u8 index)
+{
+ struct zl3073x_ref *ref = &zldev->ref[index];
+ int rc;
+
+ /* For differential type inputs the N-pin reference shares
+ * part of the configuration with the P-pin counterpart.
+ */
+ if (zl3073x_is_n_pin(index) && zl3073x_ref_is_diff(ref - 1)) {
+ struct zl3073x_ref *p_ref = &zldev->ref[index - 1];
+
+ /* Copy the shared items from the P-pin */
+ ref->config = p_ref->config;
+
+ return 0; /* Finish - no non-shared items for now */
+ }
+
+ guard(mutex)(&zldev->multiop_lock);
+
+ /* Read reference configuration */
+ rc = zl3073x_mb_op(zldev, ZL_REG_REF_MB_SEM, ZL_REF_MB_SEM_RD,
+ ZL_REG_REF_MB_MASK, BIT(index));
+ if (rc)
+ return rc;
+
+ /* Read ref_config register */
+ rc = zl3073x_read_u8(zldev, ZL_REG_REF_CONFIG, &ref->config);
+ if (rc)
+ return rc;
+
+ dev_dbg(zldev->dev, "REF%u is %s and configured as %s\n", index,
+ str_enabled_disabled(zl3073x_ref_is_enabled(ref)),
+ zl3073x_ref_is_diff(ref) ? "differential" : "single-ended");
+
+ return rc;
+}
+
+/**
+ * zl3073x_ref_state_get - get current input reference state
+ * @zldev: pointer to zl3073x_dev structure
+ * @index: input reference index to get state for
+ *
+ * Return: pointer to given input reference state
+ */
+const struct zl3073x_ref *
+zl3073x_ref_state_get(struct zl3073x_dev *zldev, u8 index)
+{
+ return &zldev->ref[index];
+}
diff --git a/drivers/dpll/zl3073x/ref.h b/drivers/dpll/zl3073x/ref.h
new file mode 100644
index 000000000000..e72f2c875087
--- /dev/null
+++ b/drivers/dpll/zl3073x/ref.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ZL3073X_REF_H
+#define _ZL3073X_REF_H
+
+#include <linux/bitfield.h>
+#include <linux/types.h>
+
+#include "regs.h"
+
+struct zl3073x_dev;
+
+/**
+ * struct zl3073x_ref - input reference state
+ * @ffo: current fractional frequency offset
+ * @config: reference config
+ */
+struct zl3073x_ref {
+ s64 ffo;
+ u8 config;
+};
+
+int zl3073x_ref_state_fetch(struct zl3073x_dev *zldev, u8 index);
+
+const struct zl3073x_ref *zl3073x_ref_state_get(struct zl3073x_dev *zldev,
+ u8 index);
+
+int zl3073x_ref_freq_factorize(u32 freq, u16 *base, u16 *mult);
+
+/**
+ * zl3073x_ref_ffo_get - get current fractional frequency offset
+ * @ref: pointer to ref state
+ *
+ * Return: the latest measured fractional frequency offset
+ */
+static inline s64
+zl3073x_ref_ffo_get(const struct zl3073x_ref *ref)
+{
+ return ref->ffo;
+}
+
+/**
+ * zl3073x_ref_is_diff - check if the given input reference is differential
+ * @ref: pointer to ref state
+ *
+ * Return: true if reference is differential, false if reference is single-ended
+ */
+static inline bool
+zl3073x_ref_is_diff(const struct zl3073x_ref *ref)
+{
+ return !!FIELD_GET(ZL_REF_CONFIG_DIFF_EN, ref->config);
+}
+
+/**
+ * zl3073x_ref_is_enabled - check if the given input reference is enabled
+ * @ref: pointer to ref state
+ *
+ * Return: true if input refernce is enabled, false otherwise
+ */
+static inline bool
+zl3073x_ref_is_enabled(const struct zl3073x_ref *ref)
+{
+ return !!FIELD_GET(ZL_REF_CONFIG_ENABLE, ref->config);
+}
+
+#endif /* _ZL3073X_REF_H */
diff --git a/drivers/dpll/zl3073x/synth.c b/drivers/dpll/zl3073x/synth.c
new file mode 100644
index 000000000000..da839572dab2
--- /dev/null
+++ b/drivers/dpll/zl3073x/synth.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bitfield.h>
+#include <linux/cleanup.h>
+#include <linux/dev_printk.h>
+#include <linux/string.h>
+#include <linux/string_choices.h>
+#include <linux/types.h>
+
+#include "core.h"
+#include "synth.h"
+
+/**
+ * zl3073x_synth_state_fetch - fetch synth state from hardware
+ * @zldev: pointer to zl3073x_dev structure
+ * @index: synth index to fetch state for
+ *
+ * Function fetches state of the given synthesizer from the hardware and
+ * stores it for later use.
+ *
+ * Return: 0 on success, <0 on error
+ */
+int zl3073x_synth_state_fetch(struct zl3073x_dev *zldev, u8 index)
+{
+ struct zl3073x_synth *synth = &zldev->synth[index];
+ int rc;
+
+ /* Read synth control register */
+ rc = zl3073x_read_u8(zldev, ZL_REG_SYNTH_CTRL(index), &synth->ctrl);
+ if (rc)
+ return rc;
+
+ guard(mutex)(&zldev->multiop_lock);
+
+ /* Read synth configuration */
+ rc = zl3073x_mb_op(zldev, ZL_REG_SYNTH_MB_SEM, ZL_SYNTH_MB_SEM_RD,
+ ZL_REG_SYNTH_MB_MASK, BIT(index));
+ if (rc)
+ return rc;
+
+ /* The output frequency is determined by the following formula:
+ * base * multiplier * numerator / denominator
+ *
+ * Read registers with these values
+ */
+ rc = zl3073x_read_u16(zldev, ZL_REG_SYNTH_FREQ_BASE, &synth->freq_base);
+ if (rc)
+ return rc;
+
+ rc = zl3073x_read_u32(zldev, ZL_REG_SYNTH_FREQ_MULT, &synth->freq_mult);
+ if (rc)
+ return rc;
+
+ rc = zl3073x_read_u16(zldev, ZL_REG_SYNTH_FREQ_M, &synth->freq_m);
+ if (rc)
+ return rc;
+
+ rc = zl3073x_read_u16(zldev, ZL_REG_SYNTH_FREQ_N, &synth->freq_n);
+ if (rc)
+ return rc;
+
+ /* Check denominator for zero to avoid div by 0 */
+ if (!synth->freq_n) {
+ dev_err(zldev->dev,
+ "Zero divisor for SYNTH%u retrieved from device\n",
+ index);
+ return -EINVAL;
+ }
+
+ dev_dbg(zldev->dev, "SYNTH%u frequency: %u Hz\n", index,
+ zl3073x_synth_freq_get(synth));
+
+ return rc;
+}
+
+/**
+ * zl3073x_synth_state_get - get current synth state
+ * @zldev: pointer to zl3073x_dev structure
+ * @index: synth index to get state for
+ *
+ * Return: pointer to given synth state
+ */
+const struct zl3073x_synth *zl3073x_synth_state_get(struct zl3073x_dev *zldev,
+ u8 index)
+{
+ return &zldev->synth[index];
+}
diff --git a/drivers/dpll/zl3073x/synth.h b/drivers/dpll/zl3073x/synth.h
new file mode 100644
index 000000000000..6c55eb8a888c
--- /dev/null
+++ b/drivers/dpll/zl3073x/synth.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ZL3073X_SYNTH_H
+#define _ZL3073X_SYNTH_H
+
+#include <linux/bitfield.h>
+#include <linux/math64.h>
+#include <linux/types.h>
+
+#include "regs.h"
+
+struct zl3073x_dev;
+
+/**
+ * struct zl3073x_synth - synthesizer state
+ * @freq_mult: frequency multiplier
+ * @freq_base: frequency base
+ * @freq_m: frequency numerator
+ * @freq_n: frequency denominator
+ * @ctrl: synth control
+ */
+struct zl3073x_synth {
+ u32 freq_mult;
+ u16 freq_base;
+ u16 freq_m;
+ u16 freq_n;
+ u8 ctrl;
+};
+
+int zl3073x_synth_state_fetch(struct zl3073x_dev *zldev, u8 synth_id);
+
+const struct zl3073x_synth *zl3073x_synth_state_get(struct zl3073x_dev *zldev,
+ u8 synth_id);
+
+int zl3073x_synth_state_set(struct zl3073x_dev *zldev, u8 synth_id,
+ const struct zl3073x_synth *synth);
+
+/**
+ * zl3073x_synth_dpll_get - get DPLL ID the synth is driven by
+ * @synth: pointer to synth state
+ *
+ * Return: ID of DPLL the given synthetizer is driven by
+ */
+static inline u8 zl3073x_synth_dpll_get(const struct zl3073x_synth *synth)
+{
+ return FIELD_GET(ZL_SYNTH_CTRL_DPLL_SEL, synth->ctrl);
+}
+
+/**
+ * zl3073x_synth_freq_get - get synth current freq
+ * @synth: pointer to synth state
+ *
+ * Return: frequency of given synthetizer
+ */
+static inline u32 zl3073x_synth_freq_get(const struct zl3073x_synth *synth)
+{
+ return mul_u64_u32_div(synth->freq_base * synth->freq_m,
+ synth->freq_mult, synth->freq_n);
+}
+
+/**
+ * zl3073x_synth_is_enabled - check if the given synth is enabled
+ * @synth: pointer to synth state
+ *
+ * Return: true if synth is enabled, false otherwise
+ */
+static inline bool zl3073x_synth_is_enabled(const struct zl3073x_synth *synth)
+{
+ return FIELD_GET(ZL_SYNTH_CTRL_EN, synth->ctrl);
+}
+
+#endif /* _ZL3073X_SYNTH_H */
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 0c5b94e64ea1..4edd2088c2db 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1563,8 +1563,7 @@ static int altr_portb_setup(struct altr_edac_device_dev *device)
goto err_release_group_1;
}
rc = devm_request_irq(&altdev->ddev, altdev->sb_irq,
- prv->ecc_irq_handler,
- IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
+ prv->ecc_irq_handler, IRQF_TRIGGER_HIGH,
ecc_name, altdev);
if (rc) {
edac_printk(KERN_ERR, EDAC_DEVICE, "PortB SBERR IRQ error\n");
@@ -1587,8 +1586,7 @@ static int altr_portb_setup(struct altr_edac_device_dev *device)
goto err_release_group_1;
}
rc = devm_request_irq(&altdev->ddev, altdev->db_irq,
- prv->ecc_irq_handler,
- IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
+ prv->ecc_irq_handler, IRQF_TRIGGER_HIGH,
ecc_name, altdev);
if (rc) {
edac_printk(KERN_ERR, EDAC_DEVICE, "PortB DBERR IRQ error\n");
@@ -1970,8 +1968,7 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
goto err_release_group1;
}
rc = devm_request_irq(edac->dev, altdev->sb_irq, prv->ecc_irq_handler,
- IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
- ecc_name, altdev);
+ IRQF_TRIGGER_HIGH, ecc_name, altdev);
if (rc) {
edac_printk(KERN_ERR, EDAC_DEVICE, "No SBERR IRQ resource\n");
goto err_release_group1;
@@ -1993,7 +1990,7 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
goto err_release_group1;
}
rc = devm_request_irq(edac->dev, altdev->db_irq, prv->ecc_irq_handler,
- IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
+ IRQF_TRIGGER_HIGH,
ecc_name, altdev);
if (rc) {
edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index 4a1bebc1ff14..471b8540d18b 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -1111,6 +1111,7 @@ static void calculate_dimm_size(struct i5000_pvt *pvt)

n = snprintf(p, space, " ");
p += n;
+ space -= n;
for (branch = 0; branch < MAX_BRANCHES; branch++) {
n = snprintf(p, space, " branch %d | ", branch);
p += n;
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index b5cf25905b05..fb49a1d1df11 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -1026,13 +1026,13 @@ static void calculate_dimm_size(struct i5400_pvt *pvt)
space -= n;
}

- space -= n;
edac_dbg(2, "%s\n", mem_buffer);
p = mem_buffer;
space = PAGE_SIZE;

n = snprintf(p, space, " ");
p += n;
+ space -= n;
for (branch = 0; branch < MAX_BRANCHES; branch++) {
n = snprintf(p, space, " branch %d | ", branch);
p += n;
diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index c72ee4756585..c501c3104b3a 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -981,10 +981,27 @@ static void __do_sched_recv_cb(u16 part_id, u16 vcpu, bool is_per_vcpu)
}
}

+/*
+ * Map logical ID index to the u16 index within the packed ID list.
+ *
+ * For native responses (FF-A width == kernel word size), IDs are
+ * tightly packed: idx -> idx.
+ *
+ * For 32-bit responses on a 64-bit kernel, each 64-bit register
+ * contributes 4 x u16 values but only the lower 2 are defined; the
+ * upper 2 are garbage. This mapping skips those upper halves:
+ * 0,1,2,3,4,5,... -> 0,1,4,5,8,9,...
+ */
+static int list_idx_to_u16_idx(int idx, bool is_native_resp)
+{
+ return is_native_resp ? idx : idx + 2 * (idx >> 1);
+}
+
static void ffa_notification_info_get(void)
{
- int idx, list, max_ids, lists_cnt, ids_processed, ids_count[MAX_IDS_64];
- bool is_64b_resp;
+ int ids_processed, ids_count[MAX_IDS_64];
+ int idx, list, max_ids, lists_cnt;
+ bool is_64b_resp, is_native_resp;
ffa_value_t ret;
u64 id_list;

@@ -1001,6 +1018,7 @@ static void ffa_notification_info_get(void)
}

is_64b_resp = (ret.a0 == FFA_FN64_SUCCESS);
+ is_native_resp = (ret.a0 == FFA_FN_NATIVE(SUCCESS));

ids_processed = 0;
lists_cnt = FIELD_GET(NOTIFICATION_INFO_GET_ID_COUNT, ret.a2);
@@ -1017,12 +1035,16 @@ static void ffa_notification_info_get(void)

/* Process IDs */
for (list = 0; list < lists_cnt; list++) {
+ int u16_idx;
u16 vcpu_id, part_id, *packed_id_list = (u16 *)&ret.a3;

if (ids_processed >= max_ids - 1)
break;

- part_id = packed_id_list[ids_processed++];
+ u16_idx = list_idx_to_u16_idx(ids_processed,
+ is_native_resp);
+ part_id = packed_id_list[u16_idx];
+ ids_processed++;

if (ids_count[list] == 1) { /* Global Notification */
__do_sched_recv_cb(part_id, 0, false);
@@ -1034,7 +1056,10 @@ static void ffa_notification_info_get(void)
if (ids_processed >= max_ids - 1)
break;

- vcpu_id = packed_id_list[ids_processed++];
+ u16_idx = list_idx_to_u16_idx(ids_processed,
+ is_native_resp);
+ vcpu_id = packed_id_list[u16_idx];
+ ids_processed++;

__do_sched_recv_cb(part_id, vcpu_id, true);
}
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index fc407d891348..c3cf5541ed68 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -691,13 +691,13 @@ static __init int match_config_table(const efi_guid_t *guid,

static __init void reserve_unaccepted(struct efi_unaccepted_memory *unaccepted)
{
- phys_addr_t start, size;
+ phys_addr_t start, end;

start = PAGE_ALIGN_DOWN(efi.unaccepted);
- size = PAGE_ALIGN(sizeof(*unaccepted) + unaccepted->size);
+ end = PAGE_ALIGN(efi.unaccepted + sizeof(*unaccepted) + unaccepted->size);

- memblock_add(start, size);
- memblock_reserve(start, size);
+ memblock_add(start, end - start);
+ memblock_reserve(start, end - start);
}

int __init efi_config_parse_tables(const efi_config_table_t *config_tables,
diff --git a/drivers/gpio/gpio-amd-fch.c b/drivers/gpio/gpio-amd-fch.c
index e6c6c3ec7656..9f329938202b 100644
--- a/drivers/gpio/gpio-amd-fch.c
+++ b/drivers/gpio/gpio-amd-fch.c
@@ -8,6 +8,7 @@
*
*/

+#include <linux/bitfield.h>
#include <linux/err.h>
#include <linux/io.h>
#include <linux/kernel.h>
@@ -120,15 +121,15 @@ static int amd_fch_gpio_get(struct gpio_chip *gc,
unsigned int offset)
{
unsigned long flags;
- int ret;
+ u32 val;
struct amd_fch_gpio_priv *priv = gpiochip_get_data(gc);
void __iomem *ptr = amd_fch_gpio_addr(priv, offset);

spin_lock_irqsave(&priv->lock, flags);
- ret = (readl_relaxed(ptr) & AMD_FCH_GPIO_FLAG_READ);
+ val = readl_relaxed(ptr);
spin_unlock_irqrestore(&priv->lock, flags);

- return ret;
+ return FIELD_GET(AMD_FCH_GPIO_FLAG_READ, val);
}

static int amd_fch_gpio_request(struct gpio_chip *chip,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6f5b4a0e0a34..803b6fc360a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -274,6 +274,8 @@ extern int amdgpu_rebar;
extern int amdgpu_wbrf;
extern int amdgpu_user_queue;

+extern uint amdgpu_hdmi_hpd_debounce_delay_ms;
+
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 6c62e27b9800..67db986eda3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1136,8 +1136,10 @@ static int amdgpu_acpi_enumerate_xcc(void)
if (!dev_info)
ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, sbdf);

- if (ret == -ENOMEM)
+ if (ret == -ENOMEM) {
+ kfree(xcc_info);
return ret;
+ }

if (!dev_info) {
kfree(xcc_info);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ec9516d6ae97..335f7e2f4ce5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -246,6 +246,7 @@ int amdgpu_damage_clips = -1; /* auto */
int amdgpu_umsch_mm_fwlog;
int amdgpu_rebar = -1; /* auto */
int amdgpu_user_queue = -1;
+uint amdgpu_hdmi_hpd_debounce_delay_ms;

DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
"DRM_UT_CORE",
@@ -1128,6 +1129,16 @@ module_param_named(rebar, amdgpu_rebar, int, 0444);
MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)");
module_param_named(user_queue, amdgpu_user_queue, int, 0444);

+/*
+ * DOC: hdmi_hpd_debounce_delay_ms (uint)
+ * HDMI HPD disconnect debounce delay in milliseconds.
+ *
+ * Used to filter short disconnect->reconnect HPD toggles some HDMI sinks
+ * generate while entering/leaving power save. Set to 0 to disable by default.
+ */
+MODULE_PARM_DESC(hdmi_hpd_debounce_delay_ms, "HDMI HPD disconnect debounce delay in milliseconds (0 to disable (by default), 1500 is common)");
+module_param_named(hdmi_hpd_debounce_delay_ms, amdgpu_hdmi_hpd_debounce_delay_ms, uint, 0644);
+
/* These devices are not supported by amdgpu.
* They are supported by the mach64, r128, radeon drivers
*/
@@ -3155,7 +3166,6 @@ static int __init amdgpu_init(void)
if (r)
goto error_fence;

- DRM_INFO("amdgpu kernel modesetting enabled.\n");
amdgpu_register_atpx_handler();
amdgpu_acpi_detect();

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index aef1ba1bdca9..01ad5cc008a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -1381,7 +1381,7 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
if (!*exp_ranges)
*exp_ranges = range_cnt;
err:
- kfree(ranges);
+ kvfree(ranges);

return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d020a890a0ea..630af847f29f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -130,11 +130,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
}

/* attempt a per ring reset */
- if (unlikely(adev->debug_disable_gpu_ring_reset)) {
- dev_err(adev->dev, "Ring reset disabled by debug mask\n");
- } else if (amdgpu_gpu_recovery &&
- amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
- ring->funcs->reset) {
+ if (amdgpu_gpu_recovery &&
+ amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
+ ring->funcs->reset) {
dev_err(adev->dev, "Starting %s ring reset\n",
s_job->sched->name);
r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e0ee21150860..3fd19859055a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -4137,7 +4137,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
* to handle fatal error */
r = amdgpu_nbio_ras_sw_init(adev);
if (r)
- return r;
+ goto release_con;

if (adev->nbio.ras &&
adev->nbio.ras->init_ras_controller_interrupt) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 5ec5c3ff22bb..304564ec2f59 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -460,9 +460,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
ktime_t deadline;
bool ret;

- if (unlikely(ring->adev->debug_disable_soft_recovery))
- return false;
-
deadline = ktime_add_us(ktime_get(), 10000);

if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 726b2bdfbba3..003bcece715e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4956,7 +4956,8 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
- if (!amdgpu_sriov_vf(adev)) {
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index c936772c0372..1dd9fd486eec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1821,13 +1821,15 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(11, 0, 3):
if ((adev->gfx.me_fw_version >= 2280) &&
(adev->gfx.mec_fw_version >= 2410) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
break;
default:
- if (!amdgpu_sriov_vf(adev)) {
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index f80e9e356e25..50e39b9d9df6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -1547,7 +1547,8 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(12, 0, 1):
if ((adev->gfx.me_fw_version >= 2660) &&
(adev->gfx.mec_fw_version >= 2920) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index dd19a97436db..7d0a2d239b78 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2409,7 +2409,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset)
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;

r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index c90cbe053ef3..a4ebb6c5af55 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1149,14 +1149,16 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
if ((adev->gfx.mec_fw_version >= 155) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
}
break;
case IP_VERSION(9, 5, 0):
if ((adev->gfx.mec_fw_version >= 21) &&
- !amdgpu_sriov_vf(adev)) {
+ !amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 36b1ca73c2ed..a1443990d5c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -2361,11 +2361,15 @@ static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
- if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ if ((adev->gfx.mec_fw_version >= 0xb0) &&
+ amdgpu_dpm_reset_sdma_is_supported(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
case IP_VERSION(9, 5, 0):
- if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ if ((adev->gfx.mec_fw_version >= 0xf) &&
+ amdgpu_dpm_reset_sdma_is_supported(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 7dc67a22a7a0..45e2933214a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1424,17 +1424,9 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)

adev->sdma.supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
- switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
- case IP_VERSION(5, 0, 0):
- case IP_VERSION(5, 0, 2):
- case IP_VERSION(5, 0, 5):
- if ((adev->sdma.instance[0].fw_version >= 35) &&
- !amdgpu_sriov_vf(adev))
- adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
- break;
- default:
- break;
- }
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;

/* Allocate memory for SDMA IP Dump buffer */
ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 3bd44c24f692..5b982cc91af3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1342,23 +1342,9 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)

adev->sdma.supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
- switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
- case IP_VERSION(5, 2, 0):
- case IP_VERSION(5, 2, 2):
- case IP_VERSION(5, 2, 3):
- case IP_VERSION(5, 2, 4):
- if ((adev->sdma.instance[0].fw_version >= 76) &&
- !amdgpu_sriov_vf(adev))
- adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
- break;
- case IP_VERSION(5, 2, 5):
- if ((adev->sdma.instance[0].fw_version >= 34) &&
- !amdgpu_sriov_vf(adev))
- adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
- break;
- default:
- break;
- }
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;

/* Allocate memory for SDMA IP Dump buffer */
ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 3c6568d50199..6809c6d4be5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1351,17 +1351,9 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)

adev->sdma.supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
- switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
- case IP_VERSION(6, 0, 0):
- case IP_VERSION(6, 0, 2):
- case IP_VERSION(6, 0, 3):
- if ((adev->sdma.instance[0].fw_version >= 21) &&
- !amdgpu_sriov_vf(adev))
- adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
- break;
- default:
- break;
- }
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;

if (amdgpu_sdma_ras_sw_init(adev)) {
dev_err(adev->dev, "Failed to initialize sdma ras block!\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 326ecc8d37d2..2b81344dcd66 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -1337,7 +1337,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)

adev->sdma.supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;

r = amdgpu_sdma_sysfs_reset_mask_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 8897dcc9c1a0..e35fae9cdaf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -1964,7 +1964,8 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
struct mmsch_v2_0_cmd_end end = { {0} };
struct mmsch_v2_0_init_header *header;
uint32_t *init_table = adev->virt.mm_table.cpu_addr;
- uint8_t i = 0;
+
+ /* This path only programs VCN instance 0. */

header = (struct mmsch_v2_0_init_header *)init_table;
direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
@@ -1983,93 +1984,93 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);

MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
0xFFFFFFFF, 0x00000004);

/* mc resume*/
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo);
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi);
offset = 0;
} else {
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr));
offset = size;
}

MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
0);
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0),
size);

MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr + offset));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr + offset));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1),
0);
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1),
AMDGPU_VCN_STACK_SIZE);

MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr + offset +
AMDGPU_VCN_STACK_SIZE));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr + offset +
AMDGPU_VCN_STACK_SIZE));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2),
0);
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2),
AMDGPU_VCN_CONTEXT_SIZE);

for (r = 0; r < adev->vcn.inst[0].num_enc_rings; ++r) {
ring = &adev->vcn.inst->ring_enc[r];
ring->wptr = 0;
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO),
lower_32_bits(ring->gpu_addr));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI),
upper_32_bits(ring->gpu_addr));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE),
ring->ring_size / 4);
}

ring = &adev->vcn.inst->ring_dec;
ring->wptr = 0;
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
lower_32_bits(ring->gpu_addr));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
upper_32_bits(ring->gpu_addr));
/* force RBC into idle state */
@@ -2080,7 +2081,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp);

/* add end packet */
tmp = sizeof(struct mmsch_v2_0_cmd_end);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index ba99e0f258ae..986cb297de8f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -401,27 +401,25 @@ static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_i
return -ENOMEM;
}

-static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
+static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, u32 watch_id)
{
spin_lock(&pdd->dev->watch_points_lock);

/* process owns device watch point so safe to clear */
- if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
- pdd->alloc_watch_ids &= ~(0x1 << watch_id);
- pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id);
+ if (pdd->alloc_watch_ids & BIT(watch_id)) {
+ pdd->alloc_watch_ids &= ~BIT(watch_id);
+ pdd->dev->alloc_watch_ids &= ~BIT(watch_id);
}

spin_unlock(&pdd->dev->watch_points_lock);
}

-static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
+static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, u32 watch_id)
{
bool owns_watch_id = false;

spin_lock(&pdd->dev->watch_points_lock);
- owns_watch_id = watch_id < MAX_WATCH_ADDRESSES &&
- ((pdd->alloc_watch_ids >> watch_id) & 0x1);
-
+ owns_watch_id = pdd->alloc_watch_ids & BIT(watch_id);
spin_unlock(&pdd->dev->watch_points_lock);

return owns_watch_id;
@@ -432,6 +430,9 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
{
int r;

+ if (watch_id >= MAX_WATCH_ADDRESSES)
+ return -EINVAL;
+
if (!kfd_dbg_owns_dev_watch_id(pdd, watch_id))
return -EINVAL;

@@ -469,6 +470,9 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
if (r)
return r;

+ if (*watch_id >= MAX_WATCH_ADDRESSES)
+ return -EINVAL;
+
if (!pdd->dev->kfd->shared_resources.enable_mes) {
r = debug_lock_and_unmap(pdd->dev->dqm);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ddfe30c13e9d..8ed513a77d38 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1992,7 +1992,7 @@ static int signal_eviction_fence(struct kfd_process *p)
ef = dma_fence_get_rcu_safe(&p->ef);
rcu_read_unlock();
if (!ef)
- return -EINVAL;
+ return true;

ret = dma_fence_signal(ef);
dma_fence_put(ef);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 6252afd1d087..a0077fe79ed2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -7548,10 +7548,12 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
drm_dp_mst_topology_mgr_destroy(&aconnector->mst_mgr);

/* Cancel and flush any pending HDMI HPD debounce work */
- cancel_delayed_work_sync(&aconnector->hdmi_hpd_debounce_work);
- if (aconnector->hdmi_prev_sink) {
- dc_sink_release(aconnector->hdmi_prev_sink);
- aconnector->hdmi_prev_sink = NULL;
+ if (aconnector->hdmi_hpd_debounce_delay_ms) {
+ cancel_delayed_work_sync(&aconnector->hdmi_hpd_debounce_work);
+ if (aconnector->hdmi_prev_sink) {
+ dc_sink_release(aconnector->hdmi_prev_sink);
+ aconnector->hdmi_prev_sink = NULL;
+ }
}

if (aconnector->bl_idx != -1) {
@@ -8715,9 +8717,18 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
mutex_init(&aconnector->hpd_lock);
mutex_init(&aconnector->handle_mst_msg_ready);

- aconnector->hdmi_hpd_debounce_delay_ms = AMDGPU_DM_HDMI_HPD_DEBOUNCE_MS;
- INIT_DELAYED_WORK(&aconnector->hdmi_hpd_debounce_work, hdmi_hpd_debounce_work);
- aconnector->hdmi_prev_sink = NULL;
+ /*
+ * If HDMI HPD debounce delay is set, use the minimum between selected
+ * value and AMDGPU_DM_MAX_HDMI_HPD_DEBOUNCE_MS
+ */
+ if (amdgpu_hdmi_hpd_debounce_delay_ms) {
+ aconnector->hdmi_hpd_debounce_delay_ms = min(amdgpu_hdmi_hpd_debounce_delay_ms,
+ AMDGPU_DM_MAX_HDMI_HPD_DEBOUNCE_MS);
+ INIT_DELAYED_WORK(&aconnector->hdmi_hpd_debounce_work, hdmi_hpd_debounce_work);
+ aconnector->hdmi_prev_sink = NULL;
+ } else {
+ aconnector->hdmi_hpd_debounce_delay_ms = 0;
+ }

/*
* configure support HPD hot plug connector_>polled default value is 0
@@ -12027,10 +12038,9 @@ static int dm_crtc_get_cursor_mode(struct amdgpu_device *adev,

/* Overlay cursor not supported on HW before DCN
* DCN401 does not have the cursor-on-scaled-plane or cursor-on-yuv-plane restrictions
- * as previous DCN generations, so enable native mode on DCN401 in addition to DCE
+ * as previous DCN generations, so enable native mode on DCN401
*/
- if (amdgpu_ip_version(adev, DCE_HWIP, 0) == 0 ||
- amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(4, 0, 1)) {
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(4, 0, 1)) {
*cursor_mode = DM_CURSOR_NATIVE_MODE;
return 0;
}
@@ -12350,6 +12360,12 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
* need to be added for DC to not disable a plane by mistake
*/
if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE) {
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) == 0) {
+ drm_dbg(dev, "Overlay cursor not supported on DCE\n");
+ ret = -EINVAL;
+ goto fail;
+ }
+
ret = drm_atomic_add_affected_planes(state, crtc);
if (ret)
goto fail;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 8ca738957598..adcd7ea69671 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -59,7 +59,10 @@

#define AMDGPU_HDR_MULT_DEFAULT (0x100000000LL)

-#define AMDGPU_DM_HDMI_HPD_DEBOUNCE_MS 1500
+/*
+ * Maximum HDMI HPD debounce delay in milliseconds
+ */
+#define AMDGPU_DM_MAX_HDMI_HPD_DEBOUNCE_MS 5000
/*
#include "include/amdgpu_dal_power_if.h"
#include "amdgpu_dm_irq.h"
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index e027798ece03..9bb7475e80ba 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1059,10 +1059,15 @@ static void amdgpu_dm_plane_get_min_max_dc_plane_scaling(struct drm_device *dev,
*min_downscale = plane_cap->max_downscale_factor.nv12;
break;

+ /* All 64 bpp formats have the same fp16 scaling limits */
case DRM_FORMAT_XRGB16161616F:
case DRM_FORMAT_ARGB16161616F:
case DRM_FORMAT_XBGR16161616F:
case DRM_FORMAT_ABGR16161616F:
+ case DRM_FORMAT_XRGB16161616:
+ case DRM_FORMAT_ARGB16161616:
+ case DRM_FORMAT_XBGR16161616:
+ case DRM_FORMAT_ABGR16161616:
*max_upscale = plane_cap->max_upscale_factor.fp16;
*min_downscale = plane_cap->max_downscale_factor.fp16;
break;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
index 82cc78c291d8..12c2a0d9fb2a 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
@@ -1226,12 +1226,12 @@ static struct stream_encoder *dcn315_stream_encoder_create(
/*PHYB is wired off in HW, allow front end to remapping, otherwise needs more changes*/

/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGF) {
- vpg_inst = eng_id;
- afmt_inst = eng_id;
- } else
+ if (eng_id < 0 || eng_id >= ARRAY_SIZE(stream_enc_regs))
return NULL;

+ vpg_inst = eng_id;
+ afmt_inst = eng_id;
+
enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
vpg = dcn31_vpg_create(ctx, vpg_inst);
afmt = dcn31_afmt_create(ctx, afmt_inst);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
index 636110e48d01..3c77c14c5a5e 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
@@ -1220,12 +1220,12 @@ static struct stream_encoder *dcn316_stream_encoder_create(
int afmt_inst;

/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGF) {
- vpg_inst = eng_id;
- afmt_inst = eng_id;
- } else
+ if (eng_id < 0 || eng_id >= ARRAY_SIZE(stream_enc_regs))
return NULL;

+ vpg_inst = eng_id;
+ afmt_inst = eng_id;
+
enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
vpg = dcn31_vpg_create(ctx, vpg_inst);
afmt = dcn31_afmt_create(ctx, afmt_inst);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index 3965a7f1b64b..9cace432ce36 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -1208,12 +1208,12 @@ static struct stream_encoder *dcn32_stream_encoder_create(
int afmt_inst;

/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGF) {
- vpg_inst = eng_id;
- afmt_inst = eng_id;
- } else
+ if (eng_id < 0 || eng_id >= ARRAY_SIZE(stream_enc_regs))
return NULL;

+ vpg_inst = eng_id;
+ afmt_inst = eng_id;
+
enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
vpg = dcn32_vpg_create(ctx, vpg_inst);
afmt = dcn32_afmt_create(ctx, afmt_inst);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index ad214986f7ac..26fd5c03c014 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -1189,12 +1189,12 @@ static struct stream_encoder *dcn321_stream_encoder_create(
int afmt_inst;

/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGF) {
- vpg_inst = eng_id;
- afmt_inst = eng_id;
- } else
+ if (eng_id < 0 || eng_id >= ARRAY_SIZE(stream_enc_regs))
return NULL;

+ vpg_inst = eng_id;
+ afmt_inst = eng_id;
+
enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
vpg = dcn321_vpg_create(ctx, vpg_inst);
afmt = dcn321_afmt_create(ctx, afmt_inst);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 06bec7dcc755..e8d74ceb9dc2 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -1271,12 +1271,12 @@ static struct stream_encoder *dcn35_stream_encoder_create(
int afmt_inst;

/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGF) {
- vpg_inst = eng_id;
- afmt_inst = eng_id;
- } else
+ if (eng_id < 0 || eng_id >= ARRAY_SIZE(stream_enc_regs))
return NULL;

+ vpg_inst = eng_id;
+ afmt_inst = eng_id;
+
enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
vpg = dcn31_vpg_create(ctx, vpg_inst);
afmt = dcn31_afmt_create(ctx, afmt_inst);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
index 7974e306126e..532e5d9bc433 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
@@ -1251,12 +1251,12 @@ static struct stream_encoder *dcn35_stream_encoder_create(
int afmt_inst;

/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGF) {
- vpg_inst = eng_id;
- afmt_inst = eng_id;
- } else
+ if (eng_id < 0 || eng_id >= ARRAY_SIZE(stream_enc_regs))
return NULL;

+ vpg_inst = eng_id;
+ afmt_inst = eng_id;
+
enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
vpg = dcn31_vpg_create(ctx, vpg_inst);
afmt = dcn31_afmt_create(ctx, afmt_inst);
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f2c92902e4a3..3f1a9892f2a3 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -419,6 +419,7 @@ void drm_buddy_fini(struct drm_buddy *mm)

for_each_free_tree(i)
kfree(mm->free_trees[i]);
+ kfree(mm->free_trees);
kfree(mm->roots);
}
EXPORT_SYMBOL(drm_buddy_fini);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index 23646e55f142..06c29ff2aac0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -199,6 +199,7 @@ struct drm_exynos_file_private {
struct exynos_drm_private {
struct device *g2d_dev;
struct device *dma_dev;
+ struct device *vidi_dev;
void *mapping;

/* for atomic commit */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index b80410a3e4aa..37733f2ac0e7 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -231,9 +231,14 @@ ATTRIBUTE_GROUPS(vidi);
int vidi_connection_ioctl(struct drm_device *drm_dev, void *data,
struct drm_file *file_priv)
{
- struct vidi_context *ctx = dev_get_drvdata(drm_dev->dev);
+ struct exynos_drm_private *priv = drm_dev->dev_private;
+ struct device *dev = priv ? priv->vidi_dev : NULL;
+ struct vidi_context *ctx = dev ? dev_get_drvdata(dev) : NULL;
struct drm_exynos_vidi_connection *vidi = data;

+ if (!ctx)
+ return -ENODEV;
+
if (!vidi) {
DRM_DEV_DEBUG_KMS(ctx->dev,
"user data for vidi is null.\n");
@@ -257,13 +262,27 @@ int vidi_connection_ioctl(struct drm_device *drm_dev, void *data,

if (vidi->connection) {
const struct drm_edid *drm_edid;
- const struct edid *raw_edid;
+ const void __user *edid_userptr = u64_to_user_ptr(vidi->edid);
+ void *edid_buf;
+ struct edid hdr;
size_t size;

- raw_edid = (const struct edid *)(unsigned long)vidi->edid;
- size = (raw_edid->extensions + 1) * EDID_LENGTH;
+ if (copy_from_user(&hdr, edid_userptr, sizeof(hdr)))
+ return -EFAULT;
+
+ size = (hdr.extensions + 1) * EDID_LENGTH;
+
+ edid_buf = kmalloc(size, GFP_KERNEL);
+ if (!edid_buf)
+ return -ENOMEM;
+
+ if (copy_from_user(edid_buf, edid_userptr, size)) {
+ kfree(edid_buf);
+ return -EFAULT;
+ }

- drm_edid = drm_edid_alloc(raw_edid, size);
+ drm_edid = drm_edid_alloc(edid_buf, size);
+ kfree(edid_buf);
if (!drm_edid)
return -ENOMEM;

@@ -393,6 +412,7 @@ static int vidi_bind(struct device *dev, struct device *master, void *data)
{
struct vidi_context *ctx = dev_get_drvdata(dev);
struct drm_device *drm_dev = data;
+ struct exynos_drm_private *priv = drm_dev->dev_private;
struct drm_encoder *encoder = &ctx->encoder;
struct exynos_drm_plane *exynos_plane;
struct exynos_drm_plane_config plane_config = { 0 };
@@ -400,6 +420,8 @@ static int vidi_bind(struct device *dev, struct device *master, void *data)
int ret;

ctx->drm_dev = drm_dev;
+ if (priv)
+ priv->vidi_dev = dev;

plane_config.pixel_formats = formats;
plane_config.num_pixel_formats = ARRAY_SIZE(formats);
@@ -445,8 +467,12 @@ static int vidi_bind(struct device *dev, struct device *master, void *data)
static void vidi_unbind(struct device *dev, struct device *master, void *data)
{
struct vidi_context *ctx = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_private *priv = drm_dev->dev_private;

timer_delete_sync(&ctx->timer);
+ if (priv)
+ priv->vidi_dev = NULL;
}

static const struct component_ops vidi_component_ops = {
diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h
index 4add05c7f161..f9ee7ebfec55 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h
+++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_comm.h
@@ -40,6 +40,10 @@ struct hibmc_dp_dev {
struct mutex lock; /* protects concurrent RW in hibmc_dp_reg_write_field() */
struct hibmc_dp_link link;
u8 dpcd[DP_RECEIVER_CAP_SIZE];
+ u8 downstream_ports[DP_MAX_DOWNSTREAM_PORTS];
+ struct drm_dp_desc desc;
+ bool is_branch;
+ int hpd_status;
void __iomem *serdes_base;
};

diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h
index 08f9e1caf7fc..efb30a758475 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h
+++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_config.h
@@ -17,5 +17,7 @@
#define HIBMC_DP_LINK_RATE_CAL 27
#define HIBMC_DP_SYNC_DELAY(lanes) ((lanes) == 0x2 ? 86 : 46)
#define HIBMC_DP_INT_ENABLE 0xc
+/* HIBMC_DP_LINK_RATE_CAL * 10000 * 80% = 216000 */
+#define DP_MODE_VALI_CAL 216000

#endif
diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c
index 8f0daec7d174..d5bd3c45649b 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.c
@@ -2,6 +2,7 @@
// Copyright (c) 2024 Hisilicon Limited.

#include <linux/io.h>
+#include <linux/iopoll.h>
#include <linux/delay.h>
#include "dp_config.h"
#include "dp_comm.h"
@@ -176,13 +177,18 @@ int hibmc_dp_hw_init(struct hibmc_dp *dp)
dp_dev->link.cap.lanes = 0x2;
dp_dev->link.cap.link_rate = DP_LINK_BW_8_1;

- /* hdcp data */
- writel(HIBMC_DP_HDCP, dp_dev->base + HIBMC_DP_HDCP_CFG);
/* int init */
writel(0, dp_dev->base + HIBMC_DP_INTR_ENABLE);
writel(HIBMC_DP_INT_RST, dp_dev->base + HIBMC_DP_INTR_ORIGINAL_STATUS);
+ /* clr colorbar */
+ writel(0, dp_dev->base + HIBMC_DP_COLOR_BAR_CTRL);
/* rst */
+ writel(0, dp_dev->base + HIBMC_DP_DPTX_RST_CTRL);
+ usleep_range(30, 50);
+ /* de-rst */
writel(HIBMC_DP_DPTX_RST, dp_dev->base + HIBMC_DP_DPTX_RST_CTRL);
+ /* hdcp data */
+ writel(HIBMC_DP_HDCP, dp_dev->base + HIBMC_DP_HDCP_CFG);
/* clock enable */
writel(HIBMC_DP_CLK_EN, dp_dev->base + HIBMC_DP_DPTX_CLK_CTRL);

@@ -263,6 +269,16 @@ void hibmc_dp_reset_link(struct hibmc_dp *dp)
dp->dp_dev->link.status.channel_equalized = false;
}

+u8 hibmc_dp_get_link_rate(struct hibmc_dp *dp)
+{
+ return dp->dp_dev->link.cap.link_rate;
+}
+
+u8 hibmc_dp_get_lanes(struct hibmc_dp *dp)
+{
+ return dp->dp_dev->link.cap.lanes;
+}
+
static const struct hibmc_dp_color_raw g_rgb_raw[] = {
{CBAR_COLOR_BAR, 0x000, 0x000, 0x000},
{CBAR_WHITE, 0xfff, 0xfff, 0xfff},
@@ -305,3 +321,21 @@ void hibmc_dp_set_cbar(struct hibmc_dp *dp, const struct hibmc_dp_cbar_cfg *cfg)
hibmc_dp_reg_write_field(dp_dev, HIBMC_DP_COLOR_BAR_CTRL, BIT(0), cfg->enable);
writel(HIBMC_DP_SYNC_EN_MASK, dp_dev->base + HIBMC_DP_TIMING_SYNC_CTRL);
}
+
+bool hibmc_dp_check_hpd_status(struct hibmc_dp *dp, int exp_status)
+{
+ u32 status;
+ int ret;
+
+ ret = readl_poll_timeout(dp->dp_dev->base + HIBMC_DP_HPD_STATUS, status,
+ FIELD_GET(HIBMC_DP_HPD_CUR_STATE, status) == exp_status,
+ 1000, 100000); /* DP spec says 100ms */
+ if (ret) {
+ drm_dbg_dp(dp->drm_dev, "wait hpd status timeout");
+ return false;
+ }
+
+ dp->dp_dev->hpd_status = exp_status;
+
+ return true;
+}
diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h
index 665f5b166dfb..31316fe1ea8d 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h
+++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_hw.h
@@ -14,6 +14,11 @@

struct hibmc_dp_dev;

+enum hibmc_hpd_status {
+ HIBMC_HPD_OUT,
+ HIBMC_HPD_IN,
+};
+
enum hibmc_dp_cbar_pattern {
CBAR_COLOR_BAR,
CBAR_WHITE,
@@ -60,5 +65,8 @@ void hibmc_dp_reset_link(struct hibmc_dp *dp);
void hibmc_dp_hpd_cfg(struct hibmc_dp *dp);
void hibmc_dp_enable_int(struct hibmc_dp *dp);
void hibmc_dp_disable_int(struct hibmc_dp *dp);
+bool hibmc_dp_check_hpd_status(struct hibmc_dp *dp, int exp_status);
+u8 hibmc_dp_get_link_rate(struct hibmc_dp *dp);
+u8 hibmc_dp_get_lanes(struct hibmc_dp *dp);

#endif
diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h
index 394b1e933c3a..64306abcd986 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h
+++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_reg.h
@@ -24,6 +24,9 @@
#define HIBMC_DP_CFG_AUX_READY_DATA_BYTE GENMASK(16, 12)
#define HIBMC_DP_CFG_AUX GENMASK(24, 17)

+#define HIBMC_DP_HPD_STATUS 0x98
+#define HIBMC_DP_HPD_CUR_STATE GENMASK(7, 4)
+
#define HIBMC_DP_PHYIF_CTRL0 0xa0
#define HIBMC_DP_CFG_SCRAMBLE_EN BIT(0)
#define HIBMC_DP_CFG_PAT_SEL GENMASK(7, 4)
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c
index d06832e62e96..616821e3c933 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_dp.c
@@ -12,6 +12,8 @@

#include "hibmc_drm_drv.h"
#include "dp/dp_hw.h"
+#include "dp/dp_comm.h"
+#include "dp/dp_config.h"

#define DP_MASKED_SINK_HPD_PLUG_INT BIT(2)

@@ -31,17 +33,76 @@ static int hibmc_dp_connector_get_modes(struct drm_connector *connector)
return count;
}

+static bool hibmc_dp_get_dpcd(struct hibmc_dp_dev *dp_dev)
+{
+ int ret;
+
+ ret = drm_dp_read_dpcd_caps(dp_dev->aux, dp_dev->dpcd);
+ if (ret)
+ return false;
+
+ dp_dev->is_branch = drm_dp_is_branch(dp_dev->dpcd);
+
+ ret = drm_dp_read_desc(dp_dev->aux, &dp_dev->desc, dp_dev->is_branch);
+ if (ret)
+ return false;
+
+ ret = drm_dp_read_downstream_info(dp_dev->aux, dp_dev->dpcd, dp_dev->downstream_ports);
+ if (ret)
+ return false;
+
+ return true;
+}
+
static int hibmc_dp_detect(struct drm_connector *connector,
struct drm_modeset_acquire_ctx *ctx, bool force)
{
- mdelay(200);
+ struct hibmc_dp *dp = to_hibmc_dp(connector);
+ struct hibmc_dp_dev *dp_dev = dp->dp_dev;
+ int ret;
+
+ if (dp->irq_status) {
+ if (dp_dev->hpd_status != HIBMC_HPD_IN)
+ return connector_status_disconnected;
+ }
+
+ if (!hibmc_dp_get_dpcd(dp_dev))
+ return connector_status_disconnected;
+
+ if (!dp_dev->is_branch)
+ return connector_status_connected;
+
+ if (drm_dp_read_sink_count_cap(connector, dp_dev->dpcd, &dp_dev->desc) &&
+ dp_dev->downstream_ports[0] & DP_DS_PORT_HPD) {
+ ret = drm_dp_read_sink_count(dp_dev->aux);
+ if (ret > 0)
+ return connector_status_connected;
+ }
+
+ return connector_status_disconnected;
+}
+
+static int hibmc_dp_mode_valid(struct drm_connector *connector,
+ const struct drm_display_mode *mode,
+ struct drm_modeset_acquire_ctx *ctx,
+ enum drm_mode_status *status)
+{
+ struct hibmc_dp *dp = to_hibmc_dp(connector);
+ u64 cur_val, max_val;

- return drm_connector_helper_detect_from_ddc(connector, ctx, force);
+ /* check DP link BW */
+ cur_val = (u64)mode->clock * HIBMC_DP_BPP;
+ max_val = (u64)hibmc_dp_get_link_rate(dp) * DP_MODE_VALI_CAL * hibmc_dp_get_lanes(dp);
+
+ *status = cur_val > max_val ? MODE_CLOCK_HIGH : MODE_OK;
+
+ return 0;
}

static const struct drm_connector_helper_funcs hibmc_dp_conn_helper_funcs = {
.get_modes = hibmc_dp_connector_get_modes,
.detect_ctx = hibmc_dp_detect,
+ .mode_valid_ctx = hibmc_dp_mode_valid,
};

static int hibmc_dp_late_register(struct drm_connector *connector)
@@ -115,7 +176,7 @@ irqreturn_t hibmc_dp_hpd_isr(int irq, void *arg)
{
struct drm_device *dev = (struct drm_device *)arg;
struct hibmc_drm_private *priv = to_hibmc_drm_private(dev);
- int idx;
+ int idx, exp_status;

if (!drm_dev_enter(dev, &idx))
return -ENODEV;
@@ -123,12 +184,14 @@ irqreturn_t hibmc_dp_hpd_isr(int irq, void *arg)
if (priv->dp.irq_status & DP_MASKED_SINK_HPD_PLUG_INT) {
drm_dbg_dp(&priv->dev, "HPD IN isr occur!\n");
hibmc_dp_hpd_cfg(&priv->dp);
+ exp_status = HIBMC_HPD_IN;
} else {
drm_dbg_dp(&priv->dev, "HPD OUT isr occur!\n");
hibmc_dp_reset_link(&priv->dp);
+ exp_status = HIBMC_HPD_OUT;
}

- if (dev->registered)
+ if (hibmc_dp_check_hpd_status(&priv->dp, exp_status))
drm_connector_helper_hpd_irq_event(&priv->dp.connector);

drm_dev_exit(idx);
diff --git a/drivers/gpu/drm/i915/display/intel_acpi.c b/drivers/gpu/drm/i915/display/intel_acpi.c
index 1addd6288241..1e8b9d175698 100644
--- a/drivers/gpu/drm/i915/display/intel_acpi.c
+++ b/drivers/gpu/drm/i915/display/intel_acpi.c
@@ -96,6 +96,7 @@ static void intel_dsm_platform_mux_info(acpi_handle dhandle)

if (!pkg->package.count) {
DRM_DEBUG_DRIVER("no connection in _DSM\n");
+ ACPI_FREE(pkg);
return;
}

diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
index 963c0f669ee5..e67ed58aa3d8 100644
--- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
@@ -77,7 +77,10 @@ static bool a2xx_me_init(struct msm_gpu *gpu)

/* Vertex and Pixel Shader Start Addresses in instructions
* (3 DWORDS per instruction) */
- OUT_RING(ring, 0x80000180);
+ if (adreno_is_a225(adreno_gpu))
+ OUT_RING(ring, 0x80000300);
+ else
+ OUT_RING(ring, 0x80000180);
/* Maximum Contexts */
OUT_RING(ring, 0x00000001);
/* Write Confirm Interval and The CP will wait the
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
index 8f978b9c3452..2f8688224f34 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
@@ -13,6 +13,7 @@ static const struct dpu_caps sc7280_dpu_caps = {
.has_dim_layer = true,
.has_idle_pc = true,
.max_linewidth = 2400,
+ .has_3d_merge = true,
.pixel_ram_size = DEFAULT_PIXEL_RAM_SIZE,
};

@@ -134,17 +135,24 @@ static const struct dpu_pingpong_cfg sc7280_pp[] = {
.name = "pingpong_2", .id = PINGPONG_2,
.base = 0x6b000, .len = 0,
.sblk = &sc7280_pp_sblk,
- .merge_3d = 0,
+ .merge_3d = MERGE_3D_1,
.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10),
}, {
.name = "pingpong_3", .id = PINGPONG_3,
.base = 0x6c000, .len = 0,
.sblk = &sc7280_pp_sblk,
- .merge_3d = 0,
+ .merge_3d = MERGE_3D_1,
.intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 11),
},
};

+static const struct dpu_merge_3d_cfg sc7280_merge_3d[] = {
+ {
+ .name = "merge_3d_1", .id = MERGE_3D_1,
+ .base = 0x4f000, .len = 0x8,
+ },
+};
+
/* NOTE: sc7280 only has one DSC hard slice encoder */
static const struct dpu_dsc_cfg sc7280_dsc[] = {
{
@@ -247,6 +255,8 @@ const struct dpu_mdss_cfg dpu_sc7280_cfg = {
.mixer = sc7280_lm,
.pingpong_count = ARRAY_SIZE(sc7280_pp),
.pingpong = sc7280_pp,
+ .merge_3d_count = ARRAY_SIZE(sc7280_merge_3d),
+ .merge_3d = sc7280_merge_3d,
.dsc_count = ARRAY_SIZE(sc7280_dsc),
.dsc = sc7280_dsc,
.wb_count = ARRAY_SIZE(sc7280_wb),
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 258edaa18fc0..777eab5ad844 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -784,24 +784,24 @@ static void _dpu_encoder_update_vsync_source(struct dpu_encoder_virt *dpu_enc,
return;
}

+ vsync_cfg.vsync_source = disp_info->vsync_source;
+ vsync_cfg.frame_rate = drm_mode_vrefresh(&dpu_enc->base.crtc->state->adjusted_mode);
+
if (hw_mdptop->ops.setup_vsync_source) {
for (i = 0; i < dpu_enc->num_phys_encs; i++)
vsync_cfg.ppnumber[i] = dpu_enc->hw_pp[i]->idx;

vsync_cfg.pp_count = dpu_enc->num_phys_encs;
- vsync_cfg.frame_rate = drm_mode_vrefresh(&dpu_enc->base.crtc->state->adjusted_mode);
-
- vsync_cfg.vsync_source = disp_info->vsync_source;

hw_mdptop->ops.setup_vsync_source(hw_mdptop, &vsync_cfg);
+ }

- for (i = 0; i < dpu_enc->num_phys_encs; i++) {
- phys_enc = dpu_enc->phys_encs[i];
+ for (i = 0; i < dpu_enc->num_phys_encs; i++) {
+ phys_enc = dpu_enc->phys_encs[i];

- if (phys_enc->has_intf_te && phys_enc->hw_intf->ops.vsync_sel)
- phys_enc->hw_intf->ops.vsync_sel(phys_enc->hw_intf,
- vsync_cfg.vsync_source);
- }
+ if (phys_enc->has_intf_te && phys_enc->hw_intf->ops.vsync_sel)
+ phys_enc->hw_intf->ops.vsync_sel(phys_enc->hw_intf,
+ &vsync_cfg);
}
}

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
index 0ec6d67c7c70..93db1484f606 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
@@ -681,10 +681,11 @@ static int dpu_encoder_phys_cmd_wait_for_commit_done(
if (!dpu_encoder_phys_cmd_is_master(phys_enc))
return 0;

- if (phys_enc->hw_ctl->ops.is_started(phys_enc->hw_ctl))
- return dpu_encoder_phys_cmd_wait_for_tx_complete(phys_enc);
+ if (phys_enc->irq[INTR_IDX_CTL_START] &&
+ !phys_enc->hw_ctl->ops.is_started(phys_enc->hw_ctl))
+ return _dpu_encoder_phys_cmd_wait_for_ctl_start(phys_enc);

- return _dpu_encoder_phys_cmd_wait_for_ctl_start(phys_enc);
+ return dpu_encoder_phys_cmd_wait_for_tx_complete(phys_enc);
}

static void dpu_encoder_phys_cmd_handle_post_kickoff(
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
index a80ac82a9625..7e620f590984 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
@@ -67,6 +67,10 @@
#define INTF_MISR_CTRL 0x180
#define INTF_MISR_SIGNATURE 0x184

+#define INTF_WD_TIMER_0_CTL 0x230
+#define INTF_WD_TIMER_0_CTL2 0x234
+#define INTF_WD_TIMER_0_LOAD_VALUE 0x238
+
#define INTF_MUX 0x25C
#define INTF_STATUS 0x26C
#define INTF_AVR_CONTROL 0x270
@@ -475,7 +479,20 @@ static int dpu_hw_intf_get_vsync_info(struct dpu_hw_intf *intf,
}

static void dpu_hw_intf_vsync_sel(struct dpu_hw_intf *intf,
- enum dpu_vsync_source vsync_source)
+ struct dpu_vsync_source_cfg *cfg)
+{
+ struct dpu_hw_blk_reg_map *c;
+
+ if (!intf)
+ return;
+
+ c = &intf->hw;
+
+ DPU_REG_WRITE(c, INTF_TEAR_MDP_VSYNC_SEL, (cfg->vsync_source & 0xf));
+}
+
+static void dpu_hw_intf_vsync_sel_v8(struct dpu_hw_intf *intf,
+ struct dpu_vsync_source_cfg *cfg)
{
struct dpu_hw_blk_reg_map *c;

@@ -484,7 +501,30 @@ static void dpu_hw_intf_vsync_sel(struct dpu_hw_intf *intf,

c = &intf->hw;

- DPU_REG_WRITE(c, INTF_TEAR_MDP_VSYNC_SEL, (vsync_source & 0xf));
+ if (cfg->vsync_source >= DPU_VSYNC_SOURCE_WD_TIMER_4 &&
+ cfg->vsync_source <= DPU_VSYNC_SOURCE_WD_TIMER_1) {
+ pr_warn_once("DPU 8.x supports only GPIOs and timer0 as TE sources\n");
+ return;
+ }
+
+ if (cfg->vsync_source == DPU_VSYNC_SOURCE_WD_TIMER_0) {
+ u32 reg;
+
+ DPU_REG_WRITE(c, INTF_WD_TIMER_0_LOAD_VALUE,
+ CALCULATE_WD_LOAD_VALUE(cfg->frame_rate));
+
+ DPU_REG_WRITE(c, INTF_WD_TIMER_0_CTL, BIT(0)); /* clear timer */
+
+ reg = BIT(8); /* enable heartbeat timer */
+ reg |= BIT(0); /* enable WD timer */
+ reg |= BIT(1); /* select default 16 clock ticks */
+ DPU_REG_WRITE(c, INTF_WD_TIMER_0_CTL2, reg);
+
+ /* make sure that timers are enabled/disabled for vsync state */
+ wmb();
+ }
+
+ dpu_hw_intf_vsync_sel(intf, cfg);
}

static void dpu_hw_intf_disable_autorefresh(struct dpu_hw_intf *intf,
@@ -598,7 +638,10 @@ struct dpu_hw_intf *dpu_hw_intf_init(struct drm_device *dev,
c->ops.enable_tearcheck = dpu_hw_intf_enable_te;
c->ops.disable_tearcheck = dpu_hw_intf_disable_te;
c->ops.connect_external_te = dpu_hw_intf_connect_external_te;
- c->ops.vsync_sel = dpu_hw_intf_vsync_sel;
+ if (mdss_rev->core_major_ver >= 8)
+ c->ops.vsync_sel = dpu_hw_intf_vsync_sel_v8;
+ else
+ c->ops.vsync_sel = dpu_hw_intf_vsync_sel;
c->ops.disable_autorefresh = dpu_hw_intf_disable_autorefresh;
}

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
index f31067a9aaf1..e84ab849d71a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h
@@ -12,6 +12,7 @@
#include "dpu_hw_util.h"

struct dpu_hw_intf;
+struct dpu_vsync_source_cfg;

/* intf timing settings */
struct dpu_hw_intf_timing_params {
@@ -107,7 +108,7 @@ struct dpu_hw_intf_ops {

int (*connect_external_te)(struct dpu_hw_intf *intf, bool enable_external_te);

- void (*vsync_sel)(struct dpu_hw_intf *intf, enum dpu_vsync_source vsync_source);
+ void (*vsync_sel)(struct dpu_hw_intf *intf, struct dpu_vsync_source_cfg *cfg);

/**
* Disable autorefresh if enabled
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
index 6f1fc790ad6d..6ff4902fce08 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
@@ -72,6 +72,8 @@
#define SSPP_EXCL_REC_XY_REC1 0x188
#define SSPP_EXCL_REC_SIZE 0x1B4
#define SSPP_EXCL_REC_XY 0x1B8
+#define SSPP_UBWC_STATIC_CTRL_REC1 0x1c0
+#define SSPP_UBWC_ERROR_STATUS_REC1 0x1c8
#define SSPP_CLK_CTRL 0x330

/* SSPP_SRC_OP_MODE & OP_MODE_REC1 */
@@ -215,7 +217,7 @@ static void dpu_hw_sspp_setup_format(struct dpu_sw_pipe *pipe,
u32 chroma_samp, unpack, src_format;
u32 opmode = 0;
u32 fast_clear = 0;
- u32 op_mode_off, unpack_pat_off, format_off;
+ u32 op_mode_off, unpack_pat_off, format_off, ubwc_ctrl_off, ubwc_error_off;

if (!ctx || !fmt)
return;
@@ -225,10 +227,21 @@ static void dpu_hw_sspp_setup_format(struct dpu_sw_pipe *pipe,
op_mode_off = SSPP_SRC_OP_MODE;
unpack_pat_off = SSPP_SRC_UNPACK_PATTERN;
format_off = SSPP_SRC_FORMAT;
+ ubwc_ctrl_off = SSPP_UBWC_STATIC_CTRL;
+ ubwc_error_off = SSPP_UBWC_ERROR_STATUS;
} else {
op_mode_off = SSPP_SRC_OP_MODE_REC1;
unpack_pat_off = SSPP_SRC_UNPACK_PATTERN_REC1;
format_off = SSPP_SRC_FORMAT_REC1;
+
+ /* reg wasn't present before DPU 8.0 */
+ if (ctx->mdss_ver->core_major_ver >= 8) {
+ ubwc_ctrl_off = SSPP_UBWC_STATIC_CTRL_REC1;
+ ubwc_error_off = SSPP_UBWC_ERROR_STATUS_REC1;
+ } else {
+ ubwc_ctrl_off = SSPP_UBWC_STATIC_CTRL;
+ ubwc_error_off = SSPP_UBWC_ERROR_STATUS;
+ }
}

c = &ctx->hw;
@@ -270,33 +283,35 @@ static void dpu_hw_sspp_setup_format(struct dpu_sw_pipe *pipe,
((fmt->bpp - 1) << 9);

if (fmt->fetch_mode != MDP_FETCH_LINEAR) {
+ u32 hbb = ctx->ubwc->highest_bank_bit - 13;
+
if (MSM_FORMAT_IS_UBWC(fmt))
opmode |= MDSS_MDP_OP_BWC_EN;
src_format |= (fmt->fetch_mode & 3) << 30; /*FRAME_FORMAT */
DPU_REG_WRITE(c, SSPP_FETCH_CONFIG,
DPU_FETCH_CONFIG_RESET_VALUE |
- ctx->ubwc->highest_bank_bit << 18);
+ hbb << 18);
switch (ctx->ubwc->ubwc_enc_version) {
case UBWC_1_0:
fast_clear = fmt->alpha_enable ? BIT(31) : 0;
- DPU_REG_WRITE(c, SSPP_UBWC_STATIC_CTRL,
+ DPU_REG_WRITE(c, ubwc_ctrl_off,
fast_clear | (ctx->ubwc->ubwc_swizzle & 0x1) |
BIT(8) |
- (ctx->ubwc->highest_bank_bit << 4));
+ (hbb << 4));
break;
case UBWC_2_0:
fast_clear = fmt->alpha_enable ? BIT(31) : 0;
- DPU_REG_WRITE(c, SSPP_UBWC_STATIC_CTRL,
+ DPU_REG_WRITE(c, ubwc_ctrl_off,
fast_clear | (ctx->ubwc->ubwc_swizzle) |
- (ctx->ubwc->highest_bank_bit << 4));
+ (hbb << 4));
break;
case UBWC_3_0:
- DPU_REG_WRITE(c, SSPP_UBWC_STATIC_CTRL,
+ DPU_REG_WRITE(c, ubwc_ctrl_off,
BIT(30) | (ctx->ubwc->ubwc_swizzle) |
- (ctx->ubwc->highest_bank_bit << 4));
+ (hbb << 4));
break;
case UBWC_4_0:
- DPU_REG_WRITE(c, SSPP_UBWC_STATIC_CTRL,
+ DPU_REG_WRITE(c, ubwc_ctrl_off,
MSM_FORMAT_IS_YUV(fmt) ? 0 : BIT(30));
break;
}
@@ -325,7 +340,7 @@ static void dpu_hw_sspp_setup_format(struct dpu_sw_pipe *pipe,
DPU_REG_WRITE(c, op_mode_off, opmode);

/* clear previous UBWC error */
- DPU_REG_WRITE(c, SSPP_UBWC_ERROR_STATUS, BIT(31));
+ DPU_REG_WRITE(c, ubwc_error_off, BIT(31));
}

static void dpu_hw_sspp_setup_pe_config(struct dpu_hw_sspp *ctx,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c
index 96dc10589bee..1ebd75d4f9be 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c
@@ -22,13 +22,6 @@
#define TRAFFIC_SHAPER_WR_CLIENT(num) (0x060 + (num * 4))
#define TRAFFIC_SHAPER_FIXPOINT_FACTOR 4

-#define MDP_TICK_COUNT 16
-#define XO_CLK_RATE 19200
-#define MS_TICKS_IN_SEC 1000
-
-#define CALCULATE_WD_LOAD_VALUE(fps) \
- ((uint32_t)((MS_TICKS_IN_SEC * XO_CLK_RATE)/(MDP_TICK_COUNT * fps)))
-
static void dpu_hw_setup_split_pipe(struct dpu_hw_mdp *mdp,
struct split_pipe_cfg *cfg)
{
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
index 67b08e99335d..6fe65bc3bff4 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
@@ -21,6 +21,13 @@

#define TO_S15D16(_x_)((_x_) << 7)

+#define MDP_TICK_COUNT 16
+#define XO_CLK_RATE 19200
+#define MS_TICKS_IN_SEC 1000
+
+#define CALCULATE_WD_LOAD_VALUE(fps) \
+ ((uint32_t)((MS_TICKS_IN_SEC * XO_CLK_RATE)/(MDP_TICK_COUNT * fps)))
+
extern const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L;
extern const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L;
extern const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l;
diff --git a/drivers/gpu/drm/msm/disp/mdp_format.c b/drivers/gpu/drm/msm/disp/mdp_format.c
index 426782d50cb4..eebedb1a2636 100644
--- a/drivers/gpu/drm/msm/disp/mdp_format.c
+++ b/drivers/gpu/drm/msm/disp/mdp_format.c
@@ -479,25 +479,25 @@ static const struct msm_format mdp_formats[] = {
0, BPC8, BPC8, BPC8,
C2_R_Cr, C0_G_Y, C1_B_Cb, C0_G_Y,
false, CHROMA_H2V1, 4, 2, MSM_FORMAT_FLAG_YUV,
- MDP_FETCH_LINEAR, 2),
+ MDP_FETCH_LINEAR, 1),

INTERLEAVED_YUV_FMT(UYVY,
0, BPC8, BPC8, BPC8,
C1_B_Cb, C0_G_Y, C2_R_Cr, C0_G_Y,
false, CHROMA_H2V1, 4, 2, MSM_FORMAT_FLAG_YUV,
- MDP_FETCH_LINEAR, 2),
+ MDP_FETCH_LINEAR, 1),

INTERLEAVED_YUV_FMT(YUYV,
0, BPC8, BPC8, BPC8,
C0_G_Y, C1_B_Cb, C0_G_Y, C2_R_Cr,
false, CHROMA_H2V1, 4, 2, MSM_FORMAT_FLAG_YUV,
- MDP_FETCH_LINEAR, 2),
+ MDP_FETCH_LINEAR, 1),

INTERLEAVED_YUV_FMT(YVYU,
0, BPC8, BPC8, BPC8,
C0_G_Y, C2_R_Cr, C0_G_Y, C1_B_Cb,
false, CHROMA_H2V1, 4, 2, MSM_FORMAT_FLAG_YUV,
- MDP_FETCH_LINEAR, 2),
+ MDP_FETCH_LINEAR, 1),

/* 3 plane YUV */
PLANAR_YUV_FMT(YUV420,
diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c
index c42fd2c17a32..38ed4de8313e 100644
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -2395,20 +2395,32 @@ static void msm_dp_ctrl_config_msa(struct msm_dp_ctrl_private *ctrl,
bool is_ycbcr_420)
{
u32 pixel_m, pixel_n;
- u32 mvid, nvid, pixel_div = 0, dispcc_input_rate;
+ u32 mvid, nvid, pixel_div, dispcc_input_rate;
u32 const nvid_fixed = DP_LINK_CONSTANT_N_VALUE;
u32 const link_rate_hbr2 = 540000;
u32 const link_rate_hbr3 = 810000;
unsigned long den, num;

- if (rate == link_rate_hbr3)
+ switch (rate) {
+ case link_rate_hbr3:
pixel_div = 6;
- else if (rate == 162000 || rate == 270000)
- pixel_div = 2;
- else if (rate == link_rate_hbr2)
+ break;
+ case link_rate_hbr2:
pixel_div = 4;
- else
+ break;
+ case 162000:
+ case 270000:
+ pixel_div = 2;
+ break;
+ default:
+ /*
+ * This cannot be reached but the compiler is not able to know
+ * that statically so return early to avoid a possibly invalid
+ * division.
+ */
DRM_ERROR("Invalid pixel mux divider\n");
+ return;
+ }

dispcc_input_rate = (rate * 10) / pixel_div;

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index d87d47cc7ec3..f247aad55397 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -133,8 +133,8 @@ struct msm_dp_desc {
static const struct msm_dp_desc msm_dp_desc_sa8775p[] = {
{ .io_start = 0x0af54000, .id = MSM_DP_CONTROLLER_0, .wide_bus_supported = true },
{ .io_start = 0x0af5c000, .id = MSM_DP_CONTROLLER_1, .wide_bus_supported = true },
- { .io_start = 0x22154000, .id = MSM_DP_CONTROLLER_2, .wide_bus_supported = true },
- { .io_start = 0x2215c000, .id = MSM_DP_CONTROLLER_3, .wide_bus_supported = true },
+ { .io_start = 0x22154000, .id = MSM_DP_CONTROLLER_0, .wide_bus_supported = true },
+ { .io_start = 0x2215c000, .id = MSM_DP_CONTROLLER_1, .wide_bus_supported = true },
{}
};

diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
index fdefcbd9c284..a156c7e7cea8 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
@@ -628,12 +628,7 @@ static int dsi_pll_14nm_postdiv_determine_rate(struct clk_hw *hw,

DBG("DSI%d PLL parent rate=%lu", pll_14nm->phy->id, req->rate);

- req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
- NULL,
- postdiv->width,
- postdiv->flags);
-
- return 0;
+ return divider_determine_rate(hw, req, NULL, postdiv->width, postdiv->flags);
}

static int dsi_pll_14nm_postdiv_set_rate(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 2d0e3e784c04..4dbb1b1d879f 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -229,7 +229,7 @@ static void msm_mdss_setup_ubwc_dec_50(struct msm_mdss *msm_mdss)
{
const struct qcom_ubwc_cfg_data *data = msm_mdss->mdss_data;
u32 value = MDSS_UBWC_STATIC_UBWC_SWIZZLE(data->ubwc_swizzle) |
- MDSS_UBWC_STATIC_HIGHEST_BANK_BIT(data->highest_bank_bit);
+ MDSS_UBWC_STATIC_HIGHEST_BANK_BIT(data->highest_bank_bit - 13);

if (data->ubwc_bank_spread)
value |= MDSS_UBWC_STATIC_UBWC_BANK_SPREAD;
diff --git a/drivers/gpu/drm/panel/panel-lg-sw43408.c b/drivers/gpu/drm/panel/panel-lg-sw43408.c
index 46a56ea92ad9..6e307fba658f 100644
--- a/drivers/gpu/drm/panel/panel-lg-sw43408.c
+++ b/drivers/gpu/drm/panel/panel-lg-sw43408.c
@@ -294,10 +294,6 @@ static void sw43408_remove(struct mipi_dsi_device *dsi)
struct sw43408_panel *ctx = mipi_dsi_get_drvdata(dsi);
int ret;

- ret = sw43408_unprepare(&ctx->base);
- if (ret < 0)
- dev_err(&dsi->dev, "failed to unprepare panel: %d\n", ret);
-
ret = mipi_dsi_detach(dsi);
if (ret < 0)
dev_err(&dsi->dev, "failed to detach from DSI host: %d\n", ret);
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
index db69449a5be0..77d0f4ced120 100644
--- a/drivers/gpu/drm/panthor/panthor_gpu.c
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -49,7 +49,7 @@ struct panthor_gpu {
static void panthor_gpu_coherency_set(struct panthor_device *ptdev)
{
gpu_write(ptdev, GPU_COHERENCY_PROTOCOL,
- ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE);
+ ptdev->coherent ? GPU_COHERENCY_ACE_LITE : GPU_COHERENCY_NONE);
}

static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
@@ -259,38 +259,42 @@ int panthor_gpu_l2_power_on(struct panthor_device *ptdev)
int panthor_gpu_flush_caches(struct panthor_device *ptdev,
u32 l2, u32 lsc, u32 other)
{
- bool timedout = false;
unsigned long flags;
+ int ret = 0;

/* Serialize cache flush operations. */
guard(mutex)(&ptdev->gpu->cache_flush_lock);

spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
- if (!drm_WARN_ON(&ptdev->base,
- ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) {
+ if (!(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) {
ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED;
gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other));
+ } else {
+ ret = -EIO;
}
spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);

+ if (ret)
+ return ret;
+
if (!wait_event_timeout(ptdev->gpu->reqs_acked,
!(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED),
msecs_to_jiffies(100))) {
spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 &&
!(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_CLEAN_CACHES_COMPLETED))
- timedout = true;
+ ret = -ETIMEDOUT;
else
ptdev->gpu->pending_reqs &= ~GPU_IRQ_CLEAN_CACHES_COMPLETED;
spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
}

- if (timedout) {
+ if (ret) {
+ panthor_device_schedule_reset(ptdev);
drm_err(&ptdev->base, "Flush caches timeout");
- return -ETIMEDOUT;
}

- return 0;
+ return ret;
}

/**
@@ -330,6 +334,7 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev)
return -ETIMEDOUT;
}

+ ptdev->gpu->pending_reqs = 0;
return 0;
}

diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index 15961629872e..0fd8ffec92dd 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -1561,6 +1561,10 @@ static void panthor_vm_destroy(struct panthor_vm *vm)

vm->destroyed = true;

+ /* Tell scheduler to stop all GPU work related to this VM */
+ if (refcount_read(&vm->as.active_cnt) > 0)
+ panthor_sched_prepare_for_vm_destruction(vm->ptdev);
+
mutex_lock(&vm->heaps.lock);
panthor_heap_pool_destroy(vm->heaps.pool);
vm->heaps.pool = NULL;
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 881a07ffbabc..c7dd98936bd6 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -1902,10 +1902,10 @@ struct panthor_sched_tick_ctx {
struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT];
u32 idle_group_count;
u32 group_count;
- enum panthor_csg_priority min_priority;
struct panthor_vm *vms[MAX_CS_PER_CSG];
u32 as_count;
bool immediate_tick;
+ bool stop_tick;
u32 csg_upd_failed_mask;
};

@@ -1970,17 +1970,21 @@ tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
if (!owned_by_tick_ctx)
group_get(group);

- list_move_tail(&group->run_node, &ctx->groups[group->priority]);
ctx->group_count++;
+
+ /* If we have more than one active group with the same priority,
+ * we need to keep ticking to rotate the CSG priority.
+ */
if (group_is_idle(group))
ctx->idle_group_count++;
+ else if (!list_empty(&ctx->groups[group->priority]))
+ ctx->stop_tick = false;
+
+ list_move_tail(&group->run_node, &ctx->groups[group->priority]);

if (i == ctx->as_count)
ctx->vms[ctx->as_count++] = group->vm;

- if (ctx->min_priority > group->priority)
- ctx->min_priority = group->priority;
-
if (tick_ctx_is_full(sched, ctx))
return;
}
@@ -1989,31 +1993,22 @@ tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
static void
tick_ctx_insert_old_group(struct panthor_scheduler *sched,
struct panthor_sched_tick_ctx *ctx,
- struct panthor_group *group,
- bool full_tick)
+ struct panthor_group *group)
{
struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id];
struct panthor_group *other_group;

- if (!full_tick) {
- list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
- return;
- }
-
- /* Rotate to make sure groups with lower CSG slot
- * priorities have a chance to get a higher CSG slot
- * priority next time they get picked. This priority
- * has an impact on resource request ordering, so it's
- * important to make sure we don't let one group starve
- * all other groups with the same group priority.
- */
+ /* Class groups in descending priority order so we can easily rotate. */
list_for_each_entry(other_group,
&ctx->old_groups[csg_slot->group->priority],
run_node) {
struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id];

- if (other_csg_slot->priority > csg_slot->priority) {
- list_add_tail(&csg_slot->group->run_node, &other_group->run_node);
+ /* Our group has a higher prio than the one we're testing against,
+ * place it just before.
+ */
+ if (csg_slot->priority > other_csg_slot->priority) {
+ list_add_tail(&group->run_node, &other_group->run_node);
return;
}
}
@@ -2023,8 +2018,7 @@ tick_ctx_insert_old_group(struct panthor_scheduler *sched,

static void
tick_ctx_init(struct panthor_scheduler *sched,
- struct panthor_sched_tick_ctx *ctx,
- bool full_tick)
+ struct panthor_sched_tick_ctx *ctx)
{
struct panthor_device *ptdev = sched->ptdev;
struct panthor_csg_slots_upd_ctx upd_ctx;
@@ -2034,7 +2028,7 @@ tick_ctx_init(struct panthor_scheduler *sched,
memset(ctx, 0, sizeof(*ctx));
csgs_upd_ctx_init(&upd_ctx);

- ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT;
+ ctx->stop_tick = true;
for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
INIT_LIST_HEAD(&ctx->groups[i]);
INIT_LIST_HEAD(&ctx->old_groups[i]);
@@ -2062,7 +2056,7 @@ tick_ctx_init(struct panthor_scheduler *sched,
group->fatal_queues |= GENMASK(group->queue_count - 1, 0);
}

- tick_ctx_insert_old_group(sched, ctx, group, full_tick);
+ tick_ctx_insert_old_group(sched, ctx, group);
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
csg_iface->output->ack ^ CSG_STATUS_UPDATE,
CSG_STATUS_UPDATE);
@@ -2346,32 +2340,18 @@ static u64
tick_ctx_update_resched_target(struct panthor_scheduler *sched,
const struct panthor_sched_tick_ctx *ctx)
{
- /* We had space left, no need to reschedule until some external event happens. */
- if (!tick_ctx_is_full(sched, ctx))
- goto no_tick;
-
- /* If idle groups were scheduled, no need to wake up until some external
- * event happens (group unblocked, new job submitted, ...).
- */
- if (ctx->idle_group_count)
- goto no_tick;
+ u64 resched_target;

- if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT))
+ if (ctx->stop_tick)
goto no_tick;

- /* If there are groups of the same priority waiting, we need to
- * keep the scheduler ticking, otherwise, we'll just wait for
- * new groups with higher priority to be queued.
- */
- if (!list_empty(&sched->groups.runnable[ctx->min_priority])) {
- u64 resched_target = sched->last_tick + sched->tick_period;
+ resched_target = sched->last_tick + sched->tick_period;

- if (time_before64(sched->resched_target, sched->last_tick) ||
- time_before64(resched_target, sched->resched_target))
- sched->resched_target = resched_target;
+ if (time_before64(sched->resched_target, sched->last_tick) ||
+ time_before64(resched_target, sched->resched_target))
+ sched->resched_target = resched_target;

- return sched->resched_target - sched->last_tick;
- }
+ return sched->resched_target - sched->last_tick;

no_tick:
sched->resched_target = U64_MAX;
@@ -2384,9 +2364,11 @@ static void tick_work(struct work_struct *work)
tick_work.work);
struct panthor_device *ptdev = sched->ptdev;
struct panthor_sched_tick_ctx ctx;
+ u64 resched_target = sched->resched_target;
u64 remaining_jiffies = 0, resched_delay;
u64 now = get_jiffies_64();
int prio, ret, cookie;
+ bool full_tick;

if (!drm_dev_enter(&ptdev->base, &cookie))
return;
@@ -2395,18 +2377,24 @@ static void tick_work(struct work_struct *work)
if (drm_WARN_ON(&ptdev->base, ret))
goto out_dev_exit;

- if (time_before64(now, sched->resched_target))
- remaining_jiffies = sched->resched_target - now;
+ /* If the tick is stopped, calculate when the next tick would be */
+ if (resched_target == U64_MAX)
+ resched_target = sched->last_tick + sched->tick_period;
+
+ if (time_before64(now, resched_target))
+ remaining_jiffies = resched_target - now;
+
+ full_tick = remaining_jiffies == 0;

mutex_lock(&sched->lock);
if (panthor_device_reset_is_pending(sched->ptdev))
goto out_unlock;

- tick_ctx_init(sched, &ctx, remaining_jiffies != 0);
+ tick_ctx_init(sched, &ctx);
if (ctx.csg_upd_failed_mask)
goto out_cleanup_ctx;

- if (remaining_jiffies) {
+ if (!full_tick) {
/* Scheduling forced in the middle of a tick. Only RT groups
* can preempt non-RT ones. Currently running RT groups can't be
* preempted.
@@ -2428,9 +2416,29 @@ static void tick_work(struct work_struct *work)
for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
prio >= 0 && !tick_ctx_is_full(sched, &ctx);
prio--) {
+ struct panthor_group *old_highest_prio_group =
+ list_first_entry_or_null(&ctx.old_groups[prio],
+ struct panthor_group, run_node);
+
+ /* Pull out the group with the highest prio for rotation. */
+ if (old_highest_prio_group)
+ list_del(&old_highest_prio_group->run_node);
+
+ /* Re-insert old active groups so they get a chance to run with higher prio. */
+ tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true);
+
+ /* Fill the remaining slots with runnable groups. */
tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio],
true, false);
- tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true);
+
+ /* Re-insert the old group with the highest prio, and give it a chance to be
+ * scheduled again (but with a lower prio) if there's room left.
+ */
+ if (old_highest_prio_group) {
+ list_add_tail(&old_highest_prio_group->run_node, &ctx.old_groups[prio]);
+ tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio],
+ true, true);
+ }
}

/* If we have free CSG slots left, pick idle groups */
@@ -2555,14 +2563,33 @@ static void sync_upd_work(struct work_struct *work)
sched_queue_delayed_work(sched, tick, 0);
}

+static void sched_resume_tick(struct panthor_device *ptdev)
+{
+ struct panthor_scheduler *sched = ptdev->scheduler;
+ u64 delay_jiffies, now;
+
+ drm_WARN_ON(&ptdev->base, sched->resched_target != U64_MAX);
+
+ /* Scheduler tick was off, recalculate the resched_target based on the
+ * last tick event, and queue the scheduler work.
+ */
+ now = get_jiffies_64();
+ sched->resched_target = sched->last_tick + sched->tick_period;
+ if (sched->used_csg_slot_count == sched->csg_slot_count &&
+ time_before64(now, sched->resched_target))
+ delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX);
+ else
+ delay_jiffies = 0;
+
+ sched_queue_delayed_work(sched, tick, delay_jiffies);
+}
+
static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
{
struct panthor_device *ptdev = group->ptdev;
struct panthor_scheduler *sched = ptdev->scheduler;
struct list_head *queue = &sched->groups.runnable[group->priority];
- u64 delay_jiffies = 0;
bool was_idle;
- u64 now;

if (!group_can_run(group))
return;
@@ -2607,13 +2634,7 @@ static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
/* Scheduler tick was off, recalculate the resched_target based on the
* last tick event, and queue the scheduler work.
*/
- now = get_jiffies_64();
- sched->resched_target = sched->last_tick + sched->tick_period;
- if (sched->used_csg_slot_count == sched->csg_slot_count &&
- time_before64(now, sched->resched_target))
- delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX);
-
- sched_queue_delayed_work(sched, tick, delay_jiffies);
+ sched_resume_tick(ptdev);
}

static void queue_stop(struct panthor_queue *queue,
@@ -2686,6 +2707,20 @@ void panthor_sched_report_mmu_fault(struct panthor_device *ptdev)
panthor_sched_immediate_tick(ptdev);
}

+void panthor_sched_prepare_for_vm_destruction(struct panthor_device *ptdev)
+{
+ /* FW can write out internal state, like the heap context, during CSG
+ * suspend. It is therefore important that the scheduler has fully
+ * evicted any pending and related groups before VM destruction can
+ * safely continue. Failure to do so can lead to GPU page faults.
+ * A controlled termination of a Panthor instance involves destroying
+ * the group(s) before the VM. This means any relevant group eviction
+ * has already been initiated by this point, and we just need to
+ * ensure that any pending tick_work() has been completed.
+ */
+ flush_work(&ptdev->scheduler->tick_work.work);
+}
+
void panthor_sched_resume(struct panthor_device *ptdev)
{
/* Force a tick to re-evaluate after a resume. */
@@ -3214,6 +3249,18 @@ queue_run_job(struct drm_sched_job *sched_job)

group_schedule_locked(group, BIT(job->queue_idx));
} else {
+ u32 queue_mask = BIT(job->queue_idx);
+ bool resume_tick = group_is_idle(group) &&
+ (group->idle_queues & queue_mask) &&
+ !(group->blocked_queues & queue_mask) &&
+ sched->resched_target == U64_MAX;
+
+ /* We just added something to the queue, so it's no longer idle. */
+ group->idle_queues &= ~queue_mask;
+
+ if (resume_tick)
+ sched_resume_tick(ptdev);
+
gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
if (!sched->pm.has_ref &&
!(group->blocked_queues & BIT(job->queue_idx))) {
diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h
index 742b0b4ff3a3..6a560ab0a5b3 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.h
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
@@ -49,6 +49,7 @@ void panthor_sched_suspend(struct panthor_device *ptdev);
void panthor_sched_resume(struct panthor_device *ptdev);

void panthor_sched_report_mmu_fault(struct panthor_device *ptdev);
+void panthor_sched_prepare_for_vm_destruction(struct panthor_device *ptdev);
void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events);

void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile);
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c
index 9ac45e7bc987..409f1a1e82a0 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c
@@ -267,12 +267,7 @@ static irqreturn_t dw_hdmi_qp_rk3576_hardirq(int irq, void *dev_id)
static irqreturn_t dw_hdmi_qp_rk3576_irq(int irq, void *dev_id)
{
struct rockchip_hdmi_qp *hdmi = dev_id;
- u32 intr_stat, val;
-
- regmap_read(hdmi->regmap, RK3576_IOC_HDMI_HPD_STATUS, &intr_stat);
-
- if (!intr_stat)
- return IRQ_NONE;
+ u32 val;

val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_CLR, 1);
regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index e2e28ff73925..a270aef7c498 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1895,7 +1895,7 @@ static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf)
int err = 0;
int idx;

- if (!drm_dev_enter(&xe->drm, &idx))
+ if (xe_device_wedged(xe) || !drm_dev_enter(&xe->drm, &idx))
return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);

ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm);
diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h
index c61e0e47ed94..08cce375ae0f 100644
--- a/drivers/gpu/drm/xe/xe_configfs.h
+++ b/drivers/gpu/drm/xe/xe_configfs.h
@@ -19,9 +19,11 @@ void xe_configfs_check_device(struct pci_dev *pdev);
bool xe_configfs_get_survivability_mode(struct pci_dev *pdev);
u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev);
bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev);
-u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class,
+u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev,
+ enum xe_engine_class class,
const u32 **cs);
-u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class,
+u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev,
+ enum xe_engine_class class,
const u32 **cs);
#else
static inline int xe_configfs_init(void) { return 0; }
@@ -30,9 +32,11 @@ static inline void xe_configfs_check_device(struct pci_dev *pdev) { }
static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; }
static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; }
static inline bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) { return false; }
-static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class,
+static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev,
+ enum xe_engine_class class,
const u32 **cs) { return 0; }
-static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class,
+static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev,
+ enum xe_engine_class class,
const u32 **cs) { return 0; }
#endif

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index fe5aadb27b77..0d69cd0e4e79 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -966,6 +966,7 @@ int xe_device_probe(struct xe_device *xe)

err_unregister_display:
xe_display_unregister(xe);
+ drm_dev_unregister(&xe->drm);

return err;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 53fdf59524c4..a3e9796e6430 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -1231,6 +1231,36 @@ int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf)
return ret;
}

+static int pc_action_set_dcc(struct xe_guc_pc *pc, bool enable)
+{
+ int ret;
+
+ ret = pc_action_set_param(pc,
+ SLPC_PARAM_TASK_ENABLE_DCC,
+ enable);
+ if (!ret)
+ return pc_action_set_param(pc,
+ SLPC_PARAM_TASK_DISABLE_DCC,
+ !enable);
+ else
+ return ret;
+}
+
+static int pc_modify_defaults(struct xe_guc_pc *pc)
+{
+ struct xe_device *xe = pc_to_xe(pc);
+ struct xe_gt *gt = pc_to_gt(pc);
+ int ret = 0;
+
+ if (xe->info.platform == XE_PANTHERLAKE) {
+ ret = pc_action_set_dcc(pc, false);
+ if (unlikely(ret))
+ xe_gt_err(gt, "Failed to modify DCC default: %pe\n", ERR_PTR(ret));
+ }
+
+ return ret;
+}
+
/**
* xe_guc_pc_start - Start GuC's Power Conservation component
* @pc: Xe_GuC_PC instance
@@ -1288,6 +1318,10 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
ktime_ms_delta(ktime_get(), earlier));
}

+ ret = pc_modify_defaults(pc);
+ if (ret)
+ return ret;
+
ret = pc_init_freqs(pc);
if (ret)
goto out;
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index ef6f3ea573a2..6752881af093 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -260,11 +260,11 @@ u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg)
struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
u32 ldw, udw, oldudw, retries;

- reg.addr = xe_mmio_adjusted_addr(mmio, reg.addr);
- reg_udw.addr = xe_mmio_adjusted_addr(mmio, reg_udw.addr);
-
- /* we shouldn't adjust just one register address */
- xe_tile_assert(mmio->tile, reg_udw.addr == reg.addr + 0x4);
+ /*
+ * The two dwords of a 64-bit register can never straddle the offset
+ * adjustment cutoff.
+ */
+ xe_tile_assert(mmio->tile, !in_range(mmio->adj_limit, reg.addr + 1, 7));

oldudw = xe_mmio_read32(mmio, reg_udw);
for (retries = 5; retries; --retries) {
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 5a3bfea8b7b4..b66849539270 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -12,7 +12,7 @@
struct xe_modparam {
bool force_execlist;
bool probe_display;
- u32 force_vram_bar_size;
+ int force_vram_bar_size;
int guc_log_level;
char *guc_firmware_path;
char *huc_firmware_path;
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index d209434fd7fc..2a2e9f2c0916 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -567,16 +567,6 @@ static const struct xe_rtp_entry_sr engine_was[] = {
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS))
},
- { XE_RTP_NAME("14019988906"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
- FUNC(xe_rtp_match_first_render_or_compute)),
- XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
- },
- { XE_RTP_NAME("14019877138"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
- FUNC(xe_rtp_match_first_render_or_compute)),
- XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
- },
{ XE_RTP_NAME("14020338487"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
FUNC(xe_rtp_match_first_render_or_compute)),
@@ -873,6 +863,14 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
},
+ { XE_RTP_NAME("14019988906"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
+ },
+ { XE_RTP_NAME("14019877138"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+ },
{ XE_RTP_NAME("14021490052"),
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(FF_MODE,
diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
index e4dfcf26b04e..2ec6d4445e84 100644
--- a/drivers/hid/hid-playstation.c
+++ b/drivers/hid/hid-playstation.c
@@ -774,7 +774,9 @@ ps_gamepad_create(struct hid_device *hdev,
#if IS_ENABLED(CONFIG_PLAYSTATION_FF)
if (play_effect) {
input_set_capability(gamepad, EV_FF, FF_RUMBLE);
- input_ff_create_memless(gamepad, NULL, play_effect);
+ ret = input_ff_create_memless(gamepad, NULL, play_effect);
+ if (ret)
+ return ERR_PTR(ret);
}
#endif

diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index c3915f3a060e..b890fbf97a75 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -730,7 +730,7 @@ void ishtp_bus_remove_all_clients(struct ishtp_device *ishtp_dev,
spin_lock_irqsave(&ishtp_dev->cl_list_lock, flags);
list_for_each_entry(cl, &ishtp_dev->cl_list, link) {
cl->state = ISHTP_CL_DISCONNECTED;
- if (warm_reset && cl->device->reference_count)
+ if (warm_reset && cl->device && cl->device->reference_count)
continue;

/*
diff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c
index 806674722868..05d643f54f45 100644
--- a/drivers/hv/mshv_eventfd.c
+++ b/drivers/hv/mshv_eventfd.c
@@ -87,8 +87,9 @@ static void mshv_irqfd_resampler_ack(struct mshv_irq_ack_notifier *mian)

idx = srcu_read_lock(&partition->pt_irq_srcu);

- hlist_for_each_entry_rcu(irqfd, &resampler->rsmplr_irqfd_list,
- irqfd_resampler_hnode) {
+ hlist_for_each_entry_srcu(irqfd, &resampler->rsmplr_irqfd_list,
+ irqfd_resampler_hnode,
+ srcu_read_lock_held(&partition->pt_irq_srcu)) {
if (hv_should_clear_interrupt(irqfd->irqfd_lapic_irq.lapic_control.interrupt_type))
hv_call_clear_virtual_interrupt(partition->pt_id);

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 69591dc7bad2..3ab62277b6be 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -25,6 +25,7 @@
#include <linux/cpu.h>
#include <linux/sched/isolation.h>
#include <linux/sched/task_stack.h>
+#include <linux/smpboot.h>

#include <linux/delay.h>
#include <linux/panic_notifier.h>
@@ -1306,7 +1307,7 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
}
}

-static void vmbus_isr(void)
+static void __vmbus_isr(void)
{
struct hv_per_cpu_context *hv_cpu
= this_cpu_ptr(hv_context.cpu_context);
@@ -1330,6 +1331,53 @@ static void vmbus_isr(void)
add_interrupt_randomness(vmbus_interrupt);
}

+static DEFINE_PER_CPU(bool, vmbus_irq_pending);
+static DEFINE_PER_CPU(struct task_struct *, vmbus_irqd);
+
+static void vmbus_irqd_wake(void)
+{
+ struct task_struct *tsk = __this_cpu_read(vmbus_irqd);
+
+ __this_cpu_write(vmbus_irq_pending, true);
+ wake_up_process(tsk);
+}
+
+static void vmbus_irqd_setup(unsigned int cpu)
+{
+ sched_set_fifo(current);
+}
+
+static int vmbus_irqd_should_run(unsigned int cpu)
+{
+ return __this_cpu_read(vmbus_irq_pending);
+}
+
+static void run_vmbus_irqd(unsigned int cpu)
+{
+ __this_cpu_write(vmbus_irq_pending, false);
+ __vmbus_isr();
+}
+
+static bool vmbus_irq_initialized;
+
+static struct smp_hotplug_thread vmbus_irq_threads = {
+ .store = &vmbus_irqd,
+ .setup = vmbus_irqd_setup,
+ .thread_should_run = vmbus_irqd_should_run,
+ .thread_fn = run_vmbus_irqd,
+ .thread_comm = "vmbus_irq/%u",
+};
+
+static void vmbus_isr(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ vmbus_irqd_wake();
+ } else {
+ lockdep_hardirq_threaded();
+ __vmbus_isr();
+ }
+}
+
static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
{
vmbus_isr();
@@ -1375,6 +1423,13 @@ static int vmbus_bus_init(void)
* the VMbus interrupt handler.
*/

+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !vmbus_irq_initialized) {
+ ret = smpboot_register_percpu_thread(&vmbus_irq_threads);
+ if (ret)
+ goto err_kthread;
+ vmbus_irq_initialized = true;
+ }
+
if (vmbus_irq == -1) {
hv_setup_vmbus_handler(vmbus_isr);
} else {
@@ -1449,6 +1504,11 @@ static int vmbus_bus_init(void)
free_percpu(vmbus_evt);
}
err_setup:
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
+ smpboot_unregister_percpu_thread(&vmbus_irq_threads);
+ vmbus_irq_initialized = false;
+ }
+err_kthread:
bus_unregister(&hv_bus);
return ret;
}
@@ -2914,6 +2974,10 @@ static void __exit vmbus_exit(void)
free_percpu_irq(vmbus_irq, vmbus_evt);
free_percpu(vmbus_evt);
}
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
+ smpboot_unregister_percpu_thread(&vmbus_irq_threads);
+ vmbus_irq_initialized = false;
+ }
for_each_online_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c
index 129f3a9e8fe9..228c5f6c6f38 100644
--- a/drivers/hwmon/ibmpex.c
+++ b/drivers/hwmon/ibmpex.c
@@ -277,9 +277,6 @@ static ssize_t ibmpex_high_low_store(struct device *dev,
{
struct ibmpex_bmc_data *data = dev_get_drvdata(dev);

- if (!data)
- return -ENODEV;
-
ibmpex_reset_high_low_data(data);

return count;
@@ -511,9 +508,6 @@ static void ibmpex_bmc_delete(struct ibmpex_bmc_data *data)
{
int i, j;

- hwmon_device_unregister(data->hwmon_dev);
- dev_set_drvdata(data->bmc_device, NULL);
-
device_remove_file(data->bmc_device,
&sensor_dev_attr_reset_high_low.dev_attr);
device_remove_file(data->bmc_device, &dev_attr_name.attr);
@@ -527,7 +521,8 @@ static void ibmpex_bmc_delete(struct ibmpex_bmc_data *data)
}

list_del(&data->list);
-
+ dev_set_drvdata(data->bmc_device, NULL);
+ hwmon_device_unregister(data->hwmon_dev);
ipmi_destroy_user(data->user);
kfree(data->sensors);
kfree(data);
diff --git a/drivers/hwmon/pmbus/mpq8785.c b/drivers/hwmon/pmbus/mpq8785.c
index 1f56aaf4dde8..87bd039c77b9 100644
--- a/drivers/hwmon/pmbus/mpq8785.c
+++ b/drivers/hwmon/pmbus/mpq8785.c
@@ -47,6 +47,33 @@ static int mpq8785_identify(struct i2c_client *client,
return 0;
};

+static int mpq8785_read_byte_data(struct i2c_client *client, int page, int reg)
+{
+ int ret;
+
+ switch (reg) {
+ case PMBUS_VOUT_MODE:
+ ret = pmbus_read_byte_data(client, page, reg);
+ if (ret < 0)
+ return ret;
+
+ if ((ret >> 5) == 1) {
+ /*
+ * The MPQ8785 chip reports VOUT_MODE as VID mode, but the driver
+ * treats VID as direct mode. Without this, identification would fail
+ * due to mode mismatch.
+ * This override ensures the reported mode matches the driver
+ * configuration, allowing successful initialization.
+ */
+ return PB_VOUT_MODE_DIRECT;
+ }
+
+ return ret;
+ default:
+ return -ENODATA;
+ }
+}
+
static int mpm82504_read_word_data(struct i2c_client *client, int page,
int phase, int reg)
{
@@ -129,6 +156,7 @@ static int mpq8785_probe(struct i2c_client *client)
break;
case mpq8785:
info->identify = mpq8785_identify;
+ info->read_byte_data = mpq8785_read_byte_data;
break;
default:
return -ENODEV;
diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c
index 27b47b8623c0..2d8de835bc24 100644
--- a/drivers/hwspinlock/omap_hwspinlock.c
+++ b/drivers/hwspinlock/omap_hwspinlock.c
@@ -88,7 +88,9 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
* make sure the module is enabled and clocked before reading
* the module SYSSTATUS register
*/
- devm_pm_runtime_enable(&pdev->dev);
+ ret = devm_pm_runtime_enable(&pdev->dev);
+ if (ret)
+ return ret;
ret = pm_runtime_resume_and_get(&pdev->dev);
if (ret < 0)
return ret;
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-core.c b/drivers/hwtracing/coresight/coresight-etm3x-core.c
index a5e809589d3e..0c011b704169 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x-core.c
@@ -795,16 +795,16 @@ static int __init etm_hp_setup(void)
{
int ret;

- ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
- "arm/coresight:starting",
- etm_starting_cpu, etm_dying_cpu);
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
+ "arm/coresight:starting",
+ etm_starting_cpu, etm_dying_cpu);

if (ret)
return ret;

- ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
- "arm/coresight:online",
- etm_online_cpu, NULL);
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "arm/coresight:online",
+ etm_online_cpu, NULL);

/* HP dyn state ID returned in ret on success */
if (ret > 0) {
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 60b0e0a6da05..9144b273d415 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1306,6 +1306,19 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)

raw_spin_lock_irqsave(&drvdata->spinlock, flags);

+ /*
+ * Since the sysfs buffer allocation and the hardware enablement is not
+ * in the same critical region, it's possible to race with the perf.
+ */
+ if (coresight_get_mode(csdev) == CS_MODE_PERF) {
+ drvdata->sysfs_buf = NULL;
+ raw_spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+ /* Free allocated memory out side of the spinlock */
+ tmc_etr_free_sysfs_buf(sysfs_buf);
+ return -EBUSY;
+ }
+
/*
* In sysFS mode we can have multiple writers per sink. Since this
* sink is already enabled no memory is needed and the HW need not be
diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index 66513a27e6e7..425e36b36009 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -620,7 +620,8 @@ static int i3c_set_hotjoin(struct i3c_master_controller *master, bool enable)
else
ret = master->ops->disable_hotjoin(master);

- master->hotjoin = enable;
+ if (!ret)
+ master->hotjoin = enable;

i3c_bus_normaluse_unlock(&master->bus);

@@ -2884,7 +2885,6 @@ int i3c_master_register(struct i3c_master_controller *master,
INIT_LIST_HEAD(&master->boardinfo.i3c);

device_initialize(&master->dev);
- dev_set_name(&master->dev, "i3c-%d", i3cbus->id);

master->dev.dma_mask = parent->dma_mask;
master->dev.coherent_dma_mask = parent->coherent_dma_mask;
@@ -2894,6 +2894,8 @@ int i3c_master_register(struct i3c_master_controller *master,
if (ret)
goto err_put_dev;

+ dev_set_name(&master->dev, "i3c-%d", i3cbus->id);
+
ret = of_populate_i3c_bus(master);
if (ret)
goto err_put_dev;
diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c
index 9ceedf09c3b6..c06595cb7401 100644
--- a/drivers/i3c/master/dw-i3c-master.c
+++ b/drivers/i3c/master/dw-i3c-master.c
@@ -1094,6 +1094,7 @@ static int dw_i3c_master_i2c_xfers(struct i2c_dev_desc *dev,
dev_err(master->dev,
"<%s> cannot resume i3c bus master, err: %d\n",
__func__, ret);
+ dw_i3c_master_free_xfer(xfer);
return ret;
}

@@ -1563,6 +1564,8 @@ int dw_i3c_common_probe(struct dw_i3c_master *master,
spin_lock_init(&master->xferqueue.lock);
INIT_LIST_HEAD(&master->xferqueue.list);

+ spin_lock_init(&master->devs_lock);
+
writel(INTR_ALL, master->regs + INTR_STATUS);
irq = platform_get_irq(pdev, 0);
ret = devm_request_irq(&pdev->dev, irq,
diff --git a/drivers/iio/accel/sca3000.c b/drivers/iio/accel/sca3000.c
index bfa8a3f5a92f..9ef4d6e27466 100644
--- a/drivers/iio/accel/sca3000.c
+++ b/drivers/iio/accel/sca3000.c
@@ -1489,7 +1489,11 @@ static int sca3000_probe(struct spi_device *spi)
if (ret)
goto error_free_irq;

- return iio_device_register(indio_dev);
+ ret = iio_device_register(indio_dev);
+ if (ret)
+ goto error_free_irq;
+
+ return 0;

error_free_irq:
if (spi->irq)
diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c
index 67ae7d1012bc..ee2fcd20545d 100644
--- a/drivers/iio/gyro/mpu3050-core.c
+++ b/drivers/iio/gyro/mpu3050-core.c
@@ -1162,10 +1162,8 @@ int mpu3050_common_probe(struct device *dev,
mpu3050->regs[1].supply = mpu3050_reg_vlogic;
ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(mpu3050->regs),
mpu3050->regs);
- if (ret) {
- dev_err(dev, "Cannot get regulators\n");
- return ret;
- }
+ if (ret)
+ return dev_err_probe(dev, ret, "Cannot get regulators\n");

ret = mpu3050_power_up(mpu3050);
if (ret)
diff --git a/drivers/iio/pressure/mprls0025pa.c b/drivers/iio/pressure/mprls0025pa.c
index 2336f2760eae..d4133fef91fa 100644
--- a/drivers/iio/pressure/mprls0025pa.c
+++ b/drivers/iio/pressure/mprls0025pa.c
@@ -59,7 +59,7 @@
*
* Values given to the userspace in sysfs interface:
* * raw - press_cnt
- * * offset - (-1 * outputmin) - pmin / scale
+ * * offset - (-1 * outputmin) + pmin / scale
* note: With all sensors from the datasheet pmin = 0
* which reduces the offset to (-1 * outputmin)
*/
@@ -160,8 +160,8 @@ static const struct iio_chan_spec mpr_channels[] = {
BIT(IIO_CHAN_INFO_OFFSET),
.scan_index = 0,
.scan_type = {
- .sign = 's',
- .realbits = 32,
+ .sign = 'u',
+ .realbits = 24,
.storagebits = 32,
.endianness = IIO_CPU,
},
@@ -313,8 +313,7 @@ static int mpr_read_raw(struct iio_dev *indio_dev,
return IIO_VAL_INT_PLUS_NANO;
case IIO_CHAN_INFO_OFFSET:
*val = data->offset;
- *val2 = data->offset2;
- return IIO_VAL_INT_PLUS_NANO;
+ return IIO_VAL_INT;
default:
return -EINVAL;
}
@@ -330,8 +329,9 @@ int mpr_common_probe(struct device *dev, const struct mpr_ops *ops, int irq)
struct mpr_data *data;
struct iio_dev *indio_dev;
const char *triplet;
- s64 scale, offset;
+ s64 odelta, pdelta;
u32 func;
+ s32 tmp;

indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
if (!indio_dev)
@@ -405,23 +405,17 @@ int mpr_common_probe(struct device *dev, const struct mpr_ops *ops, int irq)
data->outmin = mpr_func_spec[data->function].output_min;
data->outmax = mpr_func_spec[data->function].output_max;

- /* use 64 bit calculation for preserving a reasonable precision */
- scale = div_s64(((s64)(data->pmax - data->pmin)) * NANO,
- data->outmax - data->outmin);
- data->scale = div_s64_rem(scale, NANO, &data->scale2);
- /*
- * multiply with NANO before dividing by scale and later divide by NANO
- * again.
- */
- offset = ((-1LL) * (s64)data->outmin) * NANO -
- div_s64(div_s64((s64)data->pmin * NANO, scale), NANO);
- data->offset = div_s64_rem(offset, NANO, &data->offset2);
+ odelta = data->outmax - data->outmin;
+ pdelta = data->pmax - data->pmin;
+
+ data->scale = div_s64_rem(div_s64(pdelta * NANO, odelta), NANO, &tmp);
+ data->scale2 = tmp;
+
+ data->offset = div_s64(odelta * data->pmin, pdelta) - data->outmin;

if (data->irq > 0) {
- ret = devm_request_irq(dev, data->irq, mpr_eoc_handler,
- IRQF_TRIGGER_RISING,
- dev_name(dev),
- data);
+ ret = devm_request_irq(dev, data->irq, mpr_eoc_handler, 0,
+ dev_name(dev), data);
if (ret)
return dev_err_probe(dev, ret,
"request irq %d failed\n", data->irq);
diff --git a/drivers/iio/pressure/mprls0025pa.h b/drivers/iio/pressure/mprls0025pa.h
index d62a018eaff3..b6944b305126 100644
--- a/drivers/iio/pressure/mprls0025pa.h
+++ b/drivers/iio/pressure/mprls0025pa.h
@@ -53,7 +53,6 @@ enum mpr_func_id {
* @scale: pressure scale
* @scale2: pressure scale, decimal number
* @offset: pressure offset
- * @offset2: pressure offset, decimal number
* @gpiod_reset: reset
* @irq: end of conversion irq. used to distinguish between irq mode and
* reading in a loop until data is ready
@@ -75,7 +74,6 @@ struct mpr_data {
int scale;
int scale2;
int offset;
- int offset2;
struct gpio_desc *gpiod_reset;
int irq;
struct completion completion;
diff --git a/drivers/iio/pressure/mprls0025pa_spi.c b/drivers/iio/pressure/mprls0025pa_spi.c
index d04102f8a4a0..cf17eb2e7208 100644
--- a/drivers/iio/pressure/mprls0025pa_spi.c
+++ b/drivers/iio/pressure/mprls0025pa_spi.c
@@ -8,6 +8,7 @@
* https://prod-edam.honeywell.com/content/dam/honeywell-edam/sps/siot/en-us/products/sensors/pressure-sensors/board-mount-pressure-sensors/micropressure-mpr-series/documents/sps-siot-mpr-series-datasheet-32332628-ciid-172626.pdf
*/

+#include <linux/array_size.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/mod_devicetable.h>
@@ -40,17 +41,25 @@ static int mpr_spi_xfer(struct mpr_data *data, const u8 cmd, const u8 pkt_len)
{
struct spi_device *spi = to_spi_device(data->dev);
struct mpr_spi_buf *buf = spi_get_drvdata(spi);
- struct spi_transfer xfer;
+ struct spi_transfer xfers[2] = { };

if (pkt_len > MPR_MEASUREMENT_RD_SIZE)
return -EOVERFLOW;

buf->tx[0] = cmd;
- xfer.tx_buf = buf->tx;
- xfer.rx_buf = data->buffer;
- xfer.len = pkt_len;

- return spi_sync_transfer(spi, &xfer, 1);
+ /*
+ * Dummy transfer with no data, just cause a 2.5us+ delay between the CS assert
+ * and the first clock edge as per the datasheet tHDSS timing requirement.
+ */
+ xfers[0].delay.value = 2500;
+ xfers[0].delay.unit = SPI_DELAY_UNIT_NSECS;
+
+ xfers[1].tx_buf = buf->tx;
+ xfers[1].rx_buf = data->buffer;
+ xfers[1].len = pkt_len;
+
+ return spi_sync_transfer(spi, xfers, ARRAY_SIZE(xfers));
}

static const struct mpr_ops mpr_spi_ops = {
diff --git a/drivers/iio/test/Kconfig b/drivers/iio/test/Kconfig
index 6e65e929791c..4fc17dd0dcd7 100644
--- a/drivers/iio/test/Kconfig
+++ b/drivers/iio/test/Kconfig
@@ -8,7 +8,6 @@ config IIO_GTS_KUNIT_TEST
tristate "Test IIO gain-time-scale helpers" if !KUNIT_ALL_TESTS
depends on KUNIT
select IIO_GTS_HELPER
- select TEST_KUNIT_DEVICE_HELPERS
default KUNIT_ALL_TESTS
help
build unit tests for the IIO light sensor gain-time-scale helpers.
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 81cf3c902e81..0fc1c5bce2f0 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1537,7 +1537,8 @@ static void ib_cache_event_task(struct work_struct *_work)
* the cache.
*/
ret = ib_cache_update(work->event.device, work->event.element.port_num,
- work->event.event == IB_EVENT_GID_CHANGE,
+ work->event.event == IB_EVENT_GID_CHANGE ||
+ work->event.event == IB_EVENT_CLIENT_REREGISTER,
work->event.event == IB_EVENT_PKEY_CHANGE,
work->enforce_security);

diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 62410578dec3..eb942ab9c405 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -95,7 +95,6 @@ static struct workqueue_struct *iwcm_wq;
struct iwcm_work {
struct work_struct work;
struct iwcm_id_private *cm_id;
- struct list_head list;
struct iw_cm_event event;
struct list_head free_list;
};
@@ -178,7 +177,6 @@ static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
return -ENOMEM;
}
work->cm_id = cm_id_priv;
- INIT_LIST_HEAD(&work->list);
put_work(work);
}
return 0;
@@ -213,7 +211,6 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
if (refcount_dec_and_test(&cm_id_priv->refcount)) {
- BUG_ON(!list_empty(&cm_id_priv->work_list));
free_cm_id(cm_id_priv);
return true;
}
@@ -260,7 +257,6 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
refcount_set(&cm_id_priv->refcount, 1);
init_waitqueue_head(&cm_id_priv->connect_wait);
init_completion(&cm_id_priv->destroy_comp);
- INIT_LIST_HEAD(&cm_id_priv->work_list);
INIT_LIST_HEAD(&cm_id_priv->work_free_list);

return &cm_id_priv->id;
@@ -1007,13 +1003,13 @@ static int process_event(struct iwcm_id_private *cm_id_priv,
}

/*
- * Process events on the work_list for the cm_id. If the callback
- * function requests that the cm_id be deleted, a flag is set in the
- * cm_id flags to indicate that when the last reference is
- * removed, the cm_id is to be destroyed. This is necessary to
- * distinguish between an object that will be destroyed by the app
- * thread asleep on the destroy_comp list vs. an object destroyed
- * here synchronously when the last reference is removed.
+ * Process events for the cm_id. If the callback function requests
+ * that the cm_id be deleted, a flag is set in the cm_id flags to
+ * indicate that when the last reference is removed, the cm_id is
+ * to be destroyed. This is necessary to distinguish between an
+ * object that will be destroyed by the app thread asleep on the
+ * destroy_comp list vs. an object destroyed here synchronously
+ * when the last reference is removed.
*/
static void cm_work_handler(struct work_struct *_work)
{
@@ -1024,35 +1020,26 @@ static void cm_work_handler(struct work_struct *_work)
int ret = 0;

spin_lock_irqsave(&cm_id_priv->lock, flags);
- while (!list_empty(&cm_id_priv->work_list)) {
- work = list_first_entry(&cm_id_priv->work_list,
- struct iwcm_work, list);
- list_del_init(&work->list);
- levent = work->event;
- put_work(work);
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-
- if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
- ret = process_event(cm_id_priv, &levent);
- if (ret) {
- destroy_cm_id(&cm_id_priv->id);
- WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
- }
- } else
- pr_debug("dropping event %d\n", levent.event);
- if (iwcm_deref_id(cm_id_priv))
- return;
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- }
+ levent = work->event;
+ put_work(work);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
+ ret = process_event(cm_id_priv, &levent);
+ if (ret) {
+ destroy_cm_id(&cm_id_priv->id);
+ WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
+ }
+ } else
+ pr_debug("dropping event %d\n", levent.event);
+ if (iwcm_deref_id(cm_id_priv))
+ return;
}

/*
* This function is called on interrupt context. Schedule events on
* the iwcm_wq thread to allow callback functions to downcall into
- * the CM and/or block. Events are queued to a per-CM_ID
- * work_list. If this is the first event on the work_list, the work
- * element is also queued on the iwcm_wq thread.
+ * the CM and/or block.
*
* Each event holds a reference on the cm_id. Until the last posted
* event has been delivered and processed, the cm_id cannot be
@@ -1094,7 +1081,6 @@ static int cm_event_handler(struct iw_cm_id *cm_id,
}

refcount_inc(&cm_id_priv->refcount);
- list_add_tail(&work->list, &cm_id_priv->work_list);
queue_work(iwcm_wq, &work->work);
out:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
index bf74639be128..b56fb12edece 100644
--- a/drivers/infiniband/core/iwcm.h
+++ b/drivers/infiniband/core/iwcm.h
@@ -50,7 +50,6 @@ struct iwcm_id_private {
struct ib_qp *qp;
struct completion destroy_comp;
wait_queue_head_t connect_wait;
- struct list_head work_list;
spinlock_t lock;
refcount_t refcount;
struct list_head work_free_list;
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 6354ddf2a274..2522ff1cc462 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -651,34 +651,57 @@ unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
}
EXPORT_SYMBOL(rdma_rw_mr_factor);

+/**
+ * rdma_rw_max_send_wr - compute max Send WRs needed for RDMA R/W contexts
+ * @dev: RDMA device
+ * @port_num: port number
+ * @max_rdma_ctxs: number of rdma_rw_ctx structures
+ * @create_flags: QP create flags (pass IB_QP_CREATE_INTEGRITY_EN if
+ * data integrity will be enabled on the QP)
+ *
+ * Returns the total number of Send Queue entries needed for
+ * @max_rdma_ctxs. The result accounts for memory registration and
+ * invalidation work requests when the device requires them.
+ *
+ * ULPs use this to size Send Queues and Send CQs before creating a
+ * Queue Pair.
+ */
+unsigned int rdma_rw_max_send_wr(struct ib_device *dev, u32 port_num,
+ unsigned int max_rdma_ctxs, u32 create_flags)
+{
+ unsigned int factor = 1;
+ unsigned int result;
+
+ if (create_flags & IB_QP_CREATE_INTEGRITY_EN ||
+ rdma_rw_can_use_mr(dev, port_num))
+ factor += 2; /* reg + inv */
+
+ if (check_mul_overflow(factor, max_rdma_ctxs, &result))
+ return UINT_MAX;
+ return result;
+}
+EXPORT_SYMBOL(rdma_rw_max_send_wr);
+
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
{
- u32 factor;
+ unsigned int factor = 1;

WARN_ON_ONCE(attr->port_num == 0);

/*
- * Each context needs at least one RDMA READ or WRITE WR.
- *
- * For some hardware we might need more, eventually we should ask the
- * HCA driver for a multiplier here.
- */
- factor = 1;
-
- /*
- * If the device needs MRs to perform RDMA READ or WRITE operations,
- * we'll need two additional MRs for the registrations and the
- * invalidation.
+ * If the device uses MRs to perform RDMA READ or WRITE operations,
+ * or if data integrity is enabled, account for registration and
+ * invalidation work requests.
*/
if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN ||
rdma_rw_can_use_mr(dev, attr->port_num))
- factor += 2; /* inv + reg */
+ factor += 2; /* reg + inv */

attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;

/*
- * But maybe we were just too high in the sky and the device doesn't
- * even support all we need, and we'll have to live with what we get..
+ * The device might not support all we need, and we'll have to
+ * live with what we get.
*/
attr->cap.max_send_wr =
min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index fd67fc9fe85a..2f7e3c4483fc 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -514,7 +514,8 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
struct rdma_ah_attr ah_attr;
struct ib_ah *ah;
__be64 *tid;
- int ret, data_len, hdr_len, copy_offset, rmpp_active;
+ int ret, hdr_len, copy_offset, rmpp_active;
+ size_t data_len;
u8 base_version;

if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
@@ -588,7 +589,10 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
}

base_version = ((struct ib_mad_hdr *)&packet->mad.data)->base_version;
- data_len = count - hdr_size(file) - hdr_len;
+ if (check_sub_overflow(count, hdr_size(file) + hdr_len, &data_len)) {
+ ret = -EINVAL;
+ goto err_ah;
+ }
packet->msg = ib_create_send_mad(agent,
be32_to_cpu(packet->mad.hdr.qpn),
packet->mad.hdr.pkey_index, rmpp_active,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index ce16404cdfb8..f4616deeca54 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2049,7 +2049,10 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
if (ret)
return ret;

- user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
+ if (cmd.wqe_size < sizeof(struct ib_uverbs_send_wr))
+ return -EINVAL;
+
+ user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL | __GFP_NOWARN);
if (!user_wr)
return -ENOMEM;

@@ -2239,7 +2242,7 @@ ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
if (ret)
return ERR_PTR(ret);

- user_wr = kmalloc(wqe_size, GFP_KERNEL);
+ user_wr = kmalloc(wqe_size, GFP_KERNEL | __GFP_NOWARN);
if (!user_wr)
return ERR_PTR(-ENOMEM);

diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 307c35888b30..3b6c6a6e9f97 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -61,7 +61,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
u8 tclass = get_tclass(grh);
u8 priority = 0;
u8 tc_mode = 0;
- int ret;
+ int ret = 0;

if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) {
ret = -EOPNOTSUPP;
@@ -78,19 +78,18 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
ah->av.flowlabel = grh->flow_label;
ah->av.udp_sport = get_ah_udp_sport(ah_attr);
ah->av.tclass = tclass;
+ ah->av.sl = rdma_ah_get_sl(ah_attr);

- ret = hr_dev->hw->get_dscp(hr_dev, tclass, &tc_mode, &priority);
- if (ret == -EOPNOTSUPP)
- ret = 0;
-
- if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
- goto err_out;
+ if (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ ret = hr_dev->hw->get_dscp(hr_dev, tclass, &tc_mode, &priority);
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
+ else if (ret)
+ goto err_out;

- if (tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
- grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
- ah->av.sl = priority;
- else
- ah->av.sl = rdma_ah_get_sl(ah_attr);
+ if (tc_mode == HNAE3_TC_MAP_MODE_DSCP)
+ ah->av.sl = priority;
+ }

if (!check_sl_valid(hr_dev, ah->av.sl)) {
ret = -EINVAL;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 63052c0e7613..f895731ad74a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -3661,6 +3661,23 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
HNS_ROCE_V2_CQ_DEFAULT_INTERVAL);
}

+static bool left_sw_wc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+{
+ struct hns_roce_qp *hr_qp;
+
+ list_for_each_entry(hr_qp, &hr_cq->sq_list, sq_node) {
+ if (hr_qp->sq.head != hr_qp->sq.tail)
+ return true;
+ }
+
+ list_for_each_entry(hr_qp, &hr_cq->rq_list, rq_node) {
+ if (hr_qp->rq.head != hr_qp->rq.tail)
+ return true;
+ }
+
+ return false;
+}
+
static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
enum ib_cq_notify_flags flags)
{
@@ -3669,6 +3686,12 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
struct hns_roce_v2_db cq_db = {};
u32 notify_flag;

+ if (hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN) {
+ if ((flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ left_sw_wc(hr_dev, hr_cq))
+ return 1;
+ return 0;
+ }
/*
* flags = 0, then notify_flag : next
* flags = 1, then notify flag : solocited
@@ -4975,20 +4998,22 @@ static int hns_roce_set_sl(struct ib_qp *ibqp,
struct ib_device *ibdev = &hr_dev->ib_dev;
int ret;

- ret = hns_roce_hw_v2_get_dscp(hr_dev, get_tclass(&attr->ah_attr.grh),
- &hr_qp->tc_mode, &hr_qp->priority);
- if (ret && ret != -EOPNOTSUPP &&
- grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
- ibdev_err_ratelimited(ibdev,
- "failed to get dscp, ret = %d.\n", ret);
- return ret;
- }
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);

- if (hr_qp->tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
- grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
- hr_qp->sl = hr_qp->priority;
- else
- hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+ if (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ ret = hns_roce_hw_v2_get_dscp(hr_dev,
+ get_tclass(&attr->ah_attr.grh),
+ &hr_qp->tc_mode, &hr_qp->priority);
+ if (ret && ret != -EOPNOTSUPP) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to get dscp, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ if (hr_qp->tc_mode == HNAE3_TC_MAP_MODE_DSCP)
+ hr_qp->sl = hr_qp->priority;
+ }

if (!check_sl_valid(hr_dev, hr_qp->sl))
return -EINVAL;
@@ -6878,7 +6903,8 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)

INIT_WORK(&hr_dev->ecc_work, fmea_ram_ecc_work);

- hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0);
+ hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq",
+ WQ_MEM_RECLAIM);
if (!hr_dev->irq_workq) {
dev_err(dev, "failed to create irq workqueue.\n");
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index fc1e86f6c409..5899bd5cb162 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -557,12 +557,20 @@ static int mlx5_query_port_roce(struct ib_device *device, u32 port_num,
* of an error it will still be zeroed out.
* Use native port in case of reps
*/
- if (dev->is_rep)
- err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
- 1, 0);
- else
- err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
- mdev_port_num, 0);
+ if (dev->is_rep) {
+ struct mlx5_eswitch_rep *rep;
+
+ rep = dev->port[port_num - 1].rep;
+ if (rep) {
+ mdev = mlx5_eswitch_get_core_dev(rep->esw);
+ WARN_ON(!mdev);
+ }
+ mdev_port_num = 1;
+ }
+
+ err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
+ mdev_port_num, 0);
+
if (err)
goto out;
ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability);
@@ -2874,7 +2882,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
container_of(_work, struct mlx5_ib_event_work, work);
struct mlx5_ib_dev *ibdev;
struct ib_event ibev;
- bool fatal = false;

if (work->is_slave) {
ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi);
@@ -2885,12 +2892,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
}

switch (work->event) {
- case MLX5_DEV_EVENT_SYS_ERROR:
- ibev.event = IB_EVENT_DEVICE_FATAL;
- mlx5_ib_handle_internal_error(ibdev);
- ibev.element.port_num = (u8)(unsigned long)work->param;
- fatal = true;
- break;
case MLX5_EVENT_TYPE_PORT_CHANGE:
if (handle_port_change(ibdev, work->param, &ibev))
goto out;
@@ -2912,8 +2913,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
if (ibdev->ib_active)
ib_dispatch_event(&ibev);

- if (fatal)
- ibdev->ib_active = false;
out:
kfree(work);
}
@@ -2957,6 +2956,66 @@ static int mlx5_ib_event_slave_port(struct notifier_block *nb,
return NOTIFY_OK;
}

+static void mlx5_ib_handle_sys_error_event(struct work_struct *_work)
+{
+ struct mlx5_ib_event_work *work =
+ container_of(_work, struct mlx5_ib_event_work, work);
+ struct mlx5_ib_dev *ibdev = work->dev;
+ struct ib_event ibev;
+
+ ibev.event = IB_EVENT_DEVICE_FATAL;
+ mlx5_ib_handle_internal_error(ibdev);
+ ibev.element.port_num = (u8)(unsigned long)work->param;
+ ibev.device = &ibdev->ib_dev;
+
+ if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) {
+ mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num);
+ goto out;
+ }
+
+ if (ibdev->ib_active)
+ ib_dispatch_event(&ibev);
+
+ ibdev->ib_active = false;
+out:
+ kfree(work);
+}
+
+static int mlx5_ib_sys_error_event(struct notifier_block *nb,
+ unsigned long event, void *param)
+{
+ struct mlx5_ib_event_work *work;
+
+ if (event != MLX5_DEV_EVENT_SYS_ERROR)
+ return NOTIFY_DONE;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return NOTIFY_DONE;
+
+ INIT_WORK(&work->work, mlx5_ib_handle_sys_error_event);
+ work->dev = container_of(nb, struct mlx5_ib_dev, sys_error_events);
+ work->is_slave = false;
+ work->param = param;
+ work->event = event;
+
+ queue_work(mlx5_ib_event_wq, &work->work);
+
+ return NOTIFY_OK;
+}
+
+static int mlx5_ib_stage_sys_error_notifier_init(struct mlx5_ib_dev *dev)
+{
+ dev->sys_error_events.notifier_call = mlx5_ib_sys_error_event;
+ mlx5_notifier_register(dev->mdev, &dev->sys_error_events);
+ return 0;
+}
+
+static void mlx5_ib_stage_sys_error_notifier_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_notifier_unregister(dev->mdev, &dev->sys_error_events);
+}
+
static int mlx5_ib_get_plane_num(struct mlx5_core_dev *mdev, u8 *num_plane)
{
struct mlx5_hca_vport_context vport_ctx;
@@ -4462,12 +4521,16 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL) {
err = mlx5_ib_init_ucaps(dev);
if (err)
- return err;
+ goto err_ucaps;
}

dev->ib_dev.use_cq_dim = true;

return 0;
+
+err_ucaps:
+ bitmap_free(dev->var_table.bitmap);
+ return err;
}

static const struct ib_device_ops mlx5_ib_dev_port_ops = {
@@ -4803,6 +4866,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
mlx5_ib_devx_init,
mlx5_ib_devx_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SYS_ERROR_NOTIFIER,
+ mlx5_ib_stage_sys_error_notifier_init,
+ mlx5_ib_stage_sys_error_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -4860,6 +4926,9 @@ const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
mlx5_ib_devx_init,
mlx5_ib_devx_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SYS_ERROR_NOTIFIER,
+ mlx5_ib_stage_sys_error_notifier_init,
+ mlx5_ib_stage_sys_error_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 09d82d5f95e3..fbccb0362590 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1007,6 +1007,7 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_BFREG,
MLX5_IB_STAGE_PRE_IB_REG_UMR,
MLX5_IB_STAGE_WHITELIST_UID,
+ MLX5_IB_STAGE_SYS_ERROR_NOTIFIER,
MLX5_IB_STAGE_IB_REG,
MLX5_IB_STAGE_DEVICE_NOTIFIER,
MLX5_IB_STAGE_POST_IB_REG_UMR,
@@ -1165,6 +1166,7 @@ struct mlx5_ib_dev {
/* protect accessing data_direct_dev */
struct mutex data_direct_lock;
struct notifier_block mdev_events;
+ struct notifier_block sys_error_events;
struct notifier_block lag_events;
int num_ports;
/* serialize update of capability mask
diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c
index 2fcf553044e1..1ee31611b4b3 100644
--- a/drivers/infiniband/hw/mlx5/std_types.c
+++ b/drivers/infiniband/hw/mlx5/std_types.c
@@ -195,7 +195,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH)(
int out_len = uverbs_attr_get_len(attrs,
MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH);
u32 dev_path_len;
- char *dev_path;
+ char *dev_path = NULL;
int ret;

c = to_mucontext(ib_uverbs_get_ucontext(attrs));
@@ -223,9 +223,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH)(

ret = uverbs_copy_to(attrs, MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH, dev_path,
dev_path_len);
- kfree(dev_path);

end:
+ kfree(dev_path);
mutex_unlock(&dev->data_direct_lock);
return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index a5b2b62f596b..1390e861bd1d 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -119,12 +119,15 @@ void retransmit_timer(struct timer_list *t)

rxe_dbg_qp(qp, "retransmit timer fired\n");

+ if (!rxe_get(qp))
+ return;
spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
qp->comp.timeout = 1;
rxe_sched_task(&qp->send_task);
}
spin_unlock_irqrestore(&qp->state_lock, flags);
+ rxe_put(qp);
}

void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index bcb97b3ea58a..2c486bb616a7 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -72,14 +72,46 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr)
mr->ibmr.type = IB_MR_TYPE_DMA;
}

+/*
+ * Convert iova to page_info index. The page_info stores pages of size
+ * PAGE_SIZE, but MRs can have different page sizes. This function
+ * handles the conversion for all cases:
+ *
+ * 1. mr->page_size > PAGE_SIZE:
+ * The MR's iova may not be aligned to mr->page_size. We use the
+ * aligned base (iova & page_mask) as reference, then calculate
+ * which PAGE_SIZE sub-page the iova falls into.
+ *
+ * 2. mr->page_size <= PAGE_SIZE:
+ * Use simple shift arithmetic since each page_info entry corresponds
+ * to one or more MR pages.
+ */
static unsigned long rxe_mr_iova_to_index(struct rxe_mr *mr, u64 iova)
{
- return (iova >> mr->page_shift) - (mr->ibmr.iova >> mr->page_shift);
+ int idx;
+
+ if (mr_page_size(mr) > PAGE_SIZE)
+ idx = (iova - (mr->ibmr.iova & mr->page_mask)) >> PAGE_SHIFT;
+ else
+ idx = (iova >> mr->page_shift) -
+ (mr->ibmr.iova >> mr->page_shift);
+
+ WARN_ON(idx >= mr->nbuf);
+ return idx;
}

+/*
+ * Convert iova to offset within the page_info entry.
+ *
+ * For mr_page_size > PAGE_SIZE, the offset is within the system page.
+ * For mr_page_size <= PAGE_SIZE, the offset is within the MR page size.
+ */
static unsigned long rxe_mr_iova_to_page_offset(struct rxe_mr *mr, u64 iova)
{
- return iova & (mr_page_size(mr) - 1);
+ if (mr_page_size(mr) > PAGE_SIZE)
+ return iova & (PAGE_SIZE - 1);
+ else
+ return iova & (mr_page_size(mr) - 1);
}

static bool is_pmem_page(struct page *pg)
@@ -93,37 +125,69 @@ static bool is_pmem_page(struct page *pg)

static int rxe_mr_fill_pages_from_sgt(struct rxe_mr *mr, struct sg_table *sgt)
{
- XA_STATE(xas, &mr->page_list, 0);
struct sg_page_iter sg_iter;
struct page *page;
bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);

+ WARN_ON(mr_page_size(mr) != PAGE_SIZE);
+
__sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
if (!__sg_page_iter_next(&sg_iter))
return 0;

- do {
- xas_lock(&xas);
- while (true) {
- page = sg_page_iter_page(&sg_iter);
-
- if (persistent && !is_pmem_page(page)) {
- rxe_dbg_mr(mr, "Page can't be persistent\n");
- xas_set_err(&xas, -EINVAL);
- break;
- }
+ while (true) {
+ page = sg_page_iter_page(&sg_iter);

- xas_store(&xas, page);
- if (xas_error(&xas))
- break;
- xas_next(&xas);
- if (!__sg_page_iter_next(&sg_iter))
- break;
+ if (persistent && !is_pmem_page(page)) {
+ rxe_dbg_mr(mr, "Page can't be persistent\n");
+ return -EINVAL;
}
- xas_unlock(&xas);
- } while (xas_nomem(&xas, GFP_KERNEL));

- return xas_error(&xas);
+ mr->page_info[mr->nbuf].page = page;
+ mr->page_info[mr->nbuf].offset = 0;
+ mr->nbuf++;
+
+ if (!__sg_page_iter_next(&sg_iter))
+ break;
+ }
+
+ return 0;
+}
+
+static int __alloc_mr_page_info(struct rxe_mr *mr, int num_pages)
+{
+ mr->page_info = kcalloc(num_pages, sizeof(struct rxe_mr_page),
+ GFP_KERNEL);
+ if (!mr->page_info)
+ return -ENOMEM;
+
+ mr->max_allowed_buf = num_pages;
+ mr->nbuf = 0;
+
+ return 0;
+}
+
+static int alloc_mr_page_info(struct rxe_mr *mr, int num_pages)
+{
+ int ret;
+
+ WARN_ON(mr->num_buf);
+ ret = __alloc_mr_page_info(mr, num_pages);
+ if (ret)
+ return ret;
+
+ mr->num_buf = num_pages;
+
+ return 0;
+}
+
+static void free_mr_page_info(struct rxe_mr *mr)
+{
+ if (!mr->page_info)
+ return;
+
+ kfree(mr->page_info);
+ mr->page_info = NULL;
}

int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
@@ -134,8 +198,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,

rxe_mr_init(access, mr);

- xa_init(&mr->page_list);
-
umem = ib_umem_get(&rxe->ib_dev, start, length, access);
if (IS_ERR(umem)) {
rxe_dbg_mr(mr, "Unable to pin memory region err = %d\n",
@@ -143,46 +205,24 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
return PTR_ERR(umem);
}

+ err = alloc_mr_page_info(mr, ib_umem_num_pages(umem));
+ if (err)
+ goto err2;
+
err = rxe_mr_fill_pages_from_sgt(mr, &umem->sgt_append.sgt);
- if (err) {
- ib_umem_release(umem);
- return err;
- }
+ if (err)
+ goto err1;

mr->umem = umem;
mr->ibmr.type = IB_MR_TYPE_USER;
mr->state = RXE_MR_STATE_VALID;

return 0;
-}
-
-static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
-{
- XA_STATE(xas, &mr->page_list, 0);
- int i = 0;
- int err;
-
- xa_init(&mr->page_list);
-
- do {
- xas_lock(&xas);
- while (i != num_buf) {
- xas_store(&xas, XA_ZERO_ENTRY);
- if (xas_error(&xas))
- break;
- xas_next(&xas);
- i++;
- }
- xas_unlock(&xas);
- } while (xas_nomem(&xas, GFP_KERNEL));
-
- err = xas_error(&xas);
- if (err)
- return err;
-
- mr->num_buf = num_buf;
-
- return 0;
+err1:
+ free_mr_page_info(mr);
+err2:
+ ib_umem_release(umem);
+ return err;
}

int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
@@ -192,7 +232,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
/* always allow remote access for FMRs */
rxe_mr_init(RXE_ACCESS_REMOTE, mr);

- err = rxe_mr_alloc(mr, max_pages);
+ err = alloc_mr_page_info(mr, max_pages);
if (err)
goto err1;

@@ -205,26 +245,43 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
return err;
}

+/*
+ * I) MRs with page_size >= PAGE_SIZE,
+ * Split a large MR page (mr->page_size) into multiple PAGE_SIZE
+ * sub-pages and store them in page_info, offset is always 0.
+ *
+ * Called when mr->page_size > PAGE_SIZE. Each call to rxe_set_page()
+ * represents one mr->page_size region, which we must split into
+ * (mr->page_size >> PAGE_SHIFT) individual pages.
+ *
+ * II) MRs with page_size < PAGE_SIZE,
+ * Save each PAGE_SIZE page and its offset within the system page in page_info.
+ */
static int rxe_set_page(struct ib_mr *ibmr, u64 dma_addr)
{
struct rxe_mr *mr = to_rmr(ibmr);
- struct page *page = ib_virt_dma_to_page(dma_addr);
bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
- int err;
+ u32 i, pages_per_mr = mr_page_size(mr) >> PAGE_SHIFT;

- if (persistent && !is_pmem_page(page)) {
- rxe_dbg_mr(mr, "Page cannot be persistent\n");
- return -EINVAL;
- }
+ pages_per_mr = MAX(1, pages_per_mr);

- if (unlikely(mr->nbuf == mr->num_buf))
- return -ENOMEM;
+ for (i = 0; i < pages_per_mr; i++) {
+ u64 addr = dma_addr + i * PAGE_SIZE;
+ struct page *sub_page = ib_virt_dma_to_page(addr);

- err = xa_err(xa_store(&mr->page_list, mr->nbuf, page, GFP_KERNEL));
- if (err)
- return err;
+ if (unlikely(mr->nbuf >= mr->max_allowed_buf))
+ return -ENOMEM;
+
+ if (persistent && !is_pmem_page(sub_page)) {
+ rxe_dbg_mr(mr, "Page cannot be persistent\n");
+ return -EINVAL;
+ }
+
+ mr->page_info[mr->nbuf].page = sub_page;
+ mr->page_info[mr->nbuf].offset = addr & (PAGE_SIZE - 1);
+ mr->nbuf++;
+ }

- mr->nbuf++;
return 0;
}

@@ -234,6 +291,31 @@ int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sgl,
struct rxe_mr *mr = to_rmr(ibmr);
unsigned int page_size = mr_page_size(mr);

+ /*
+ * Ensure page_size and PAGE_SIZE are compatible for mapping.
+ * We require one to be a multiple of the other for correct
+ * iova-to-page conversion.
+ */
+ if (!IS_ALIGNED(page_size, PAGE_SIZE) &&
+ !IS_ALIGNED(PAGE_SIZE, page_size)) {
+ rxe_dbg_mr(mr, "MR page size %u must be compatible with PAGE_SIZE %lu\n",
+ page_size, PAGE_SIZE);
+ return -EINVAL;
+ }
+
+ if (mr_page_size(mr) > PAGE_SIZE) {
+ /* resize page_info if needed */
+ u32 map_mr_pages = (page_size >> PAGE_SHIFT) * mr->num_buf;
+
+ if (map_mr_pages > mr->max_allowed_buf) {
+ rxe_dbg_mr(mr, "requested pages %u exceed max %u\n",
+ map_mr_pages, mr->max_allowed_buf);
+ free_mr_page_info(mr);
+ if (__alloc_mr_page_info(mr, map_mr_pages))
+ return -ENOMEM;
+ }
+ }
+
mr->nbuf = 0;
mr->page_shift = ilog2(page_size);
mr->page_mask = ~((u64)page_size - 1);
@@ -245,30 +327,30 @@ int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sgl,
static int rxe_mr_copy_xarray(struct rxe_mr *mr, u64 iova, void *addr,
unsigned int length, enum rxe_mr_copy_dir dir)
{
- unsigned int page_offset = rxe_mr_iova_to_page_offset(mr, iova);
- unsigned long index = rxe_mr_iova_to_index(mr, iova);
unsigned int bytes;
- struct page *page;
- void *va;
+ u8 *va;

while (length) {
- page = xa_load(&mr->page_list, index);
- if (!page)
+ unsigned long index = rxe_mr_iova_to_index(mr, iova);
+ struct rxe_mr_page *info = &mr->page_info[index];
+ unsigned int page_offset = rxe_mr_iova_to_page_offset(mr, iova);
+
+ if (!info->page)
return -EFAULT;

- bytes = min_t(unsigned int, length,
- mr_page_size(mr) - page_offset);
- va = kmap_local_page(page);
+ page_offset += info->offset;
+ bytes = min_t(unsigned int, length, PAGE_SIZE - page_offset);
+ va = kmap_local_page(info->page);
+
if (dir == RXE_FROM_MR_OBJ)
memcpy(addr, va + page_offset, bytes);
else
memcpy(va + page_offset, addr, bytes);
kunmap_local(va);

- page_offset = 0;
addr += bytes;
+ iova += bytes;
length -= bytes;
- index++;
}

return 0;
@@ -426,9 +508,6 @@ int copy_data(

static int rxe_mr_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
{
- unsigned int page_offset;
- unsigned long index;
- struct page *page;
unsigned int bytes;
int err;
u8 *va;
@@ -438,15 +517,17 @@ static int rxe_mr_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int leng
return err;

while (length > 0) {
- index = rxe_mr_iova_to_index(mr, iova);
- page = xa_load(&mr->page_list, index);
- page_offset = rxe_mr_iova_to_page_offset(mr, iova);
- if (!page)
+ unsigned long index = rxe_mr_iova_to_index(mr, iova);
+ struct rxe_mr_page *info = &mr->page_info[index];
+ unsigned int page_offset = rxe_mr_iova_to_page_offset(mr, iova);
+
+ if (!info->page)
return -EFAULT;
- bytes = min_t(unsigned int, length,
- mr_page_size(mr) - page_offset);

- va = kmap_local_page(page);
+ page_offset += info->offset;
+ bytes = min_t(unsigned int, length, PAGE_SIZE - page_offset);
+
+ va = kmap_local_page(info->page);
arch_wb_cache_pmem(va + page_offset, bytes);
kunmap_local(va);

@@ -502,6 +583,7 @@ enum resp_states rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
} else {
unsigned long index;
int err;
+ struct rxe_mr_page *info;

err = mr_check_range(mr, iova, sizeof(value));
if (err) {
@@ -510,9 +592,12 @@ enum resp_states rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
}
page_offset = rxe_mr_iova_to_page_offset(mr, iova);
index = rxe_mr_iova_to_index(mr, iova);
- page = xa_load(&mr->page_list, index);
- if (!page)
+ info = &mr->page_info[index];
+ if (!info->page)
return RESPST_ERR_RKEY_VIOLATION;
+
+ page_offset += info->offset;
+ page = info->page;
}

if (unlikely(page_offset & 0x7)) {
@@ -551,6 +636,7 @@ enum resp_states rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
} else {
unsigned long index;
int err;
+ struct rxe_mr_page *info;

/* See IBA oA19-28 */
err = mr_check_range(mr, iova, sizeof(value));
@@ -560,9 +646,12 @@ enum resp_states rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
}
page_offset = rxe_mr_iova_to_page_offset(mr, iova);
index = rxe_mr_iova_to_index(mr, iova);
- page = xa_load(&mr->page_list, index);
- if (!page)
+ info = &mr->page_info[index];
+ if (!info->page)
return RESPST_ERR_RKEY_VIOLATION;
+
+ page_offset += info->offset;
+ page = info->page;
}

/* See IBA A19.4.2 */
@@ -726,5 +815,5 @@ void rxe_mr_cleanup(struct rxe_pool_elem *elem)
ib_umem_release(mr->umem);

if (mr->ibmr.type != IB_MR_TYPE_DMA)
- xa_destroy(&mr->page_list);
+ free_mr_page_info(mr);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 373b03f223be..12d03f390b09 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -102,6 +102,8 @@ void rnr_nak_timer(struct timer_list *t)

rxe_dbg_qp(qp, "nak timer fired\n");

+ if (!rxe_get(qp))
+ return;
spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
/* request a send queue retry */
@@ -110,6 +112,7 @@ void rnr_nak_timer(struct timer_list *t)
rxe_sched_task(&qp->send_task);
}
spin_unlock_irqrestore(&qp->state_lock, flags);
+ rxe_put(qp);
}

static void req_check_sq_drain_done(struct rxe_qp *qp)
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 2a234f26ac10..c9a7cd38953d 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -77,9 +77,6 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
goto err_free;
}

- srq->rq.queue = q;
- init->attr.max_wr = srq->rq.max_wr;
-
if (uresp) {
if (copy_to_user(&uresp->srq_num, &srq->srq_num,
sizeof(uresp->srq_num))) {
@@ -88,6 +85,9 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
}
}

+ srq->rq.queue = q;
+ init->attr.max_wr = srq->rq.max_wr;
+
return 0;

err_free:
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index fd48075810dd..1b8ed1031bd5 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -335,6 +335,11 @@ static inline int rkey_is_mw(u32 rkey)
return (index >= RXE_MIN_MW_INDEX) && (index <= RXE_MAX_MW_INDEX);
}

+struct rxe_mr_page {
+ struct page *page;
+ unsigned int offset; /* offset in system page */
+};
+
struct rxe_mr {
struct rxe_pool_elem elem;
struct ib_mr ibmr;
@@ -351,10 +356,13 @@ struct rxe_mr {
unsigned int page_shift;
u64 page_mask;

+ /* size of page_info when mr allocated */
u32 num_buf;
+ /* real size of page_info */
+ u32 max_allowed_buf;
u32 nbuf;

- struct xarray page_list;
+ struct rxe_mr_page *page_info;
};

static inline unsigned int mr_page_size(struct rxe_mr *mr)
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
index a10820e33887..e8a88b378d51 100644
--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -1435,7 +1435,8 @@ int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
}
if (unlikely(rv != 0 && rv != -EAGAIN)) {
if ((srx->state > SIW_GET_HDR ||
- qp->rx_fpdu->more_ddp_segs) && run_completion)
+ (qp->rx_fpdu && qp->rx_fpdu->more_ddp_segs)) &&
+ run_completion)
siw_rdmap_complete(qp, rv);

siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv,
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index 9ecc6343455d..adb798e2a54a 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -208,7 +208,6 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
size_t sg_cnt;
int err, offset;
bool need_inval;
- u32 rkey = 0;
struct ib_reg_wr rwr;
struct ib_sge *plist;
struct ib_sge list;
@@ -240,11 +239,6 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
wr->wr.num_sge = 1;
wr->remote_addr = le64_to_cpu(id->rd_msg->desc[0].addr);
wr->rkey = le32_to_cpu(id->rd_msg->desc[0].key);
- if (rkey == 0)
- rkey = wr->rkey;
- else
- /* Only one key is actually used */
- WARN_ON_ONCE(rkey != wr->rkey);

wr->wr.opcode = IB_WR_RDMA_WRITE;
wr->wr.wr_cqe = &io_comp_cqe;
@@ -277,7 +271,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id)
inv_wr.opcode = IB_WR_SEND_WITH_INV;
inv_wr.wr_cqe = &io_comp_cqe;
inv_wr.send_flags = 0;
- inv_wr.ex.invalidate_rkey = rkey;
+ inv_wr.ex.invalidate_rkey = wr->rkey;
}

imm_wr.wr.next = NULL;
@@ -601,7 +595,7 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
srv_path->mrs_num++) {
struct rtrs_srv_mr *srv_mr = &srv_path->mrs[srv_path->mrs_num];
struct scatterlist *s;
- int nr, nr_sgt, chunks;
+ int nr, nr_sgt, chunks, ind;

sgt = &srv_mr->sgt;
chunks = chunks_per_mr * srv_path->mrs_num;
@@ -631,7 +625,7 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
}
nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt,
NULL, max_chunk_size);
- if (nr != nr_sgt) {
+ if (nr < nr_sgt) {
err = nr < 0 ? nr : -EINVAL;
goto dereg_mr;
}
@@ -647,9 +641,24 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
goto dereg_mr;
}
}
- /* Eventually dma addr for each chunk can be cached */
- for_each_sg(sgt->sgl, s, nr_sgt, i)
- srv_path->dma_addr[chunks + i] = sg_dma_address(s);
+
+ /*
+ * Cache DMA addresses by traversing sg entries. If
+ * regions were merged, an inner loop is required to
+ * populate the DMA address array by traversing larger
+ * regions.
+ */
+ ind = chunks;
+ for_each_sg(sgt->sgl, s, nr_sgt, i) {
+ unsigned int dma_len = sg_dma_len(s);
+ u64 dma_addr = sg_dma_address(s);
+ u64 dma_addr_end = dma_addr + dma_len;
+
+ do {
+ srv_path->dma_addr[ind++] = dma_addr;
+ dma_addr += max_chunk_size;
+ } while (dma_addr < dma_addr_end);
+ }

ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
srv_mr->mr = mr;
diff --git a/drivers/interconnect/mediatek/icc-emi.c b/drivers/interconnect/mediatek/icc-emi.c
index 7da740b5fa8d..dfa3a9cd9399 100644
--- a/drivers/interconnect/mediatek/icc-emi.c
+++ b/drivers/interconnect/mediatek/icc-emi.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_platform.h>
+#include <linux/overflow.h>
#include <linux/platform_device.h>
#include <linux/soc/mediatek/dvfsrc.h>

@@ -22,7 +23,9 @@ static int mtk_emi_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
{
struct mtk_icc_node *in = node->data;

- *agg_avg += avg_bw;
+ if (check_add_overflow(*agg_avg, avg_bw, agg_avg))
+ *agg_avg = U32_MAX;
+
*agg_peak = max_t(u32, *agg_peak, peak_bw);

in->sum_avg = *agg_avg;
@@ -40,7 +43,7 @@ static int mtk_emi_icc_set(struct icc_node *src, struct icc_node *dst)
if (unlikely(!src->provider))
return -EINVAL;

- dev = src->provider->dev;
+ dev = src->provider->dev->parent;

switch (node->ep) {
case 0:
@@ -97,7 +100,7 @@ int mtk_emi_icc_probe(struct platform_device *pdev)
if (!data)
return -ENOMEM;

- provider->dev = pdev->dev.parent;
+ provider->dev = dev;
provider->set = mtk_emi_icc_set;
provider->aggregate = mtk_emi_icc_aggregate;
provider->xlate = of_icc_xlate_onecell;
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 9b4b589a54b5..bf77fdf5529f 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -15,7 +15,6 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data);
irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data);
irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data);
irqreturn_t amd_iommu_int_thread_galog(int irq, void *data);
-irqreturn_t amd_iommu_int_handler(int irq, void *data);
void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type,
u8 cntrl_intr, u8 cntrl_log,
u32 status_run_mask, u32 status_overflow_mask);
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 034edce816d0..53afb1cb0a6f 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -2355,12 +2355,8 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
if (r)
return r;

- r = request_threaded_irq(iommu->dev->irq,
- amd_iommu_int_handler,
- amd_iommu_int_thread,
- 0, "AMD-Vi",
- iommu);
-
+ r = request_threaded_irq(iommu->dev->irq, NULL, amd_iommu_int_thread,
+ IRQF_ONESHOT, "AMD-Vi", iommu);
if (r) {
pci_disable_msi(iommu->dev);
return r;
@@ -2534,8 +2530,8 @@ static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname,
return irq;
}

- ret = request_threaded_irq(irq, amd_iommu_int_handler,
- thread_fn, 0, devname, iommu);
+ ret = request_threaded_irq(irq, NULL, thread_fn, IRQF_ONESHOT, devname,
+ iommu);
if (ret) {
irq_domain_free_irqs(irq, 1);
irq_domain_remove(domain);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 5914bef0c8c1..30dd482fe095 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1146,11 +1146,6 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data)
return IRQ_HANDLED;
}

-irqreturn_t amd_iommu_int_handler(int irq, void *data)
-{
- return IRQ_WAKE_THREAD;
-}
-
/****************************************************************************
*
* IOMMU command queuing functions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index e236c7ec221f..49e83c8566a3 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1722,10 +1722,12 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
}

did = context_domain_id(context);
- context_clear_entry(context);
+ context_clear_present(context);
__iommu_flush_cache(iommu, context, sizeof(*context));
spin_unlock(&iommu->lock);
intel_context_flush_no_pasid(info, context, did);
+ context_clear_entry(context);
+ __iommu_flush_cache(iommu, context, sizeof(*context));
}

int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index dcc5466d35f9..9198ac7f6bba 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -969,7 +969,26 @@ static inline unsigned long lvl_to_nr_pages(unsigned int lvl)

static inline void context_set_present(struct context_entry *context)
{
- context->lo |= 1;
+ u64 val;
+
+ dma_wmb();
+ val = READ_ONCE(context->lo) | 1;
+ WRITE_ONCE(context->lo, val);
+}
+
+/*
+ * Clear the Present (P) bit (bit 0) of a context table entry. This initiates
+ * the transition of the entry's ownership from hardware to software. The
+ * caller is responsible for fulfilling the invalidation handshake recommended
+ * by the VT-d spec, Section 6.5.3.3 (Guidance to Software for Invalidations).
+ */
+static inline void context_clear_present(struct context_entry *context)
+{
+ u64 val;
+
+ val = READ_ONCE(context->lo) & GENMASK_ULL(63, 1);
+ WRITE_ONCE(context->lo, val);
+ dma_wmb();
}

static inline void context_set_fault_enable(struct context_entry *context)
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 52f678975da7..d13099a6cb9c 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -153,6 +153,9 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
if (!entries)
return NULL;

+ if (!ecap_coherent(info->iommu->ecap))
+ clflush_cache_range(entries, VTD_PAGE_SIZE);
+
/*
* The pasid directory table entry won't be freed after
* allocation. No worry about the race with free and
@@ -165,10 +168,8 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
iommu_free_pages(entries);
goto retry;
}
- if (!ecap_coherent(info->iommu->ecap)) {
- clflush_cache_range(entries, VTD_PAGE_SIZE);
+ if (!ecap_coherent(info->iommu->ecap))
clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
- }
}

return &entries[index];
@@ -272,7 +273,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,

did = pasid_get_domain_id(pte);
pgtt = pasid_pte_get_pgtt(pte);
- intel_pasid_clear_entry(dev, pasid, fault_ignore);
+ pasid_clear_present(pte);
spin_unlock(&iommu->lock);

if (!ecap_coherent(iommu->ecap))
@@ -286,6 +287,10 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);

devtlb_invalidation_with_pasid(iommu, dev, pasid);
+ intel_pasid_clear_entry(dev, pasid, fault_ignore);
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
if (!fault_ignore)
intel_iommu_drain_pasid_prq(dev, pasid);
}
@@ -1023,7 +1028,7 @@ static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
}

if (context_copied(iommu, bus, devfn)) {
- context_clear_entry(context);
+ context_clear_present(context);
__iommu_flush_cache(iommu, context, sizeof(*context));

/*
@@ -1043,6 +1048,9 @@ static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);

+ context_clear_entry(context);
+ __iommu_flush_cache(iommu, context, sizeof(*context));
+
/*
* At this point, the device is supposed to finish reset at
* its driver probe stage, so no in-flight DMA will exist,
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index a771a77d4239..637373995be8 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -233,9 +233,23 @@ static inline void pasid_set_wpe(struct pasid_entry *pe)
*/
static inline void pasid_set_present(struct pasid_entry *pe)
{
+ dma_wmb();
pasid_set_bits(&pe->val[0], 1 << 0, 1);
}

+/*
+ * Clear the Present (P) bit (bit 0) of a scalable-mode PASID table entry.
+ * This initiates the transition of the entry's ownership from hardware
+ * to software. The caller is responsible for fulfilling the invalidation
+ * handshake recommended by the VT-d spec, Section 6.5.3.3 (Guidance to
+ * Software for Invalidations).
+ */
+static inline void pasid_clear_present(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[0], 1 << 0, 0);
+ dma_wmb();
+}
+
/*
* Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
* entry.
diff --git a/drivers/leds/leds-expresswire.c b/drivers/leds/leds-expresswire.c
index bb69be228a6d..25c6b159a6ee 100644
--- a/drivers/leds/leds-expresswire.c
+++ b/drivers/leds/leds-expresswire.c
@@ -9,6 +9,7 @@
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/gpio/consumer.h>
+#include <linux/irqflags.h>
#include <linux/types.h>

#include <linux/leds-expresswire.h>
@@ -16,37 +17,41 @@
void expresswire_power_off(struct expresswire_common_props *props)
{
gpiod_set_value_cansleep(props->ctrl_gpio, 0);
- usleep_range(props->timing.poweroff_us, props->timing.poweroff_us * 2);
+ fsleep(props->timing.poweroff_us);
}
EXPORT_SYMBOL_NS_GPL(expresswire_power_off, "EXPRESSWIRE");

void expresswire_enable(struct expresswire_common_props *props)
{
+ unsigned long flags;
+
+ local_irq_save(flags);
+
gpiod_set_value(props->ctrl_gpio, 1);
udelay(props->timing.detect_delay_us);
gpiod_set_value(props->ctrl_gpio, 0);
udelay(props->timing.detect_us);
gpiod_set_value(props->ctrl_gpio, 1);
+
+ local_irq_restore(flags);
}
EXPORT_SYMBOL_NS_GPL(expresswire_enable, "EXPRESSWIRE");

-void expresswire_start(struct expresswire_common_props *props)
+static void expresswire_start(struct expresswire_common_props *props)
{
gpiod_set_value(props->ctrl_gpio, 1);
udelay(props->timing.data_start_us);
}
-EXPORT_SYMBOL_NS_GPL(expresswire_start, "EXPRESSWIRE");

-void expresswire_end(struct expresswire_common_props *props)
+static void expresswire_end(struct expresswire_common_props *props)
{
gpiod_set_value(props->ctrl_gpio, 0);
udelay(props->timing.end_of_data_low_us);
gpiod_set_value(props->ctrl_gpio, 1);
udelay(props->timing.end_of_data_high_us);
}
-EXPORT_SYMBOL_NS_GPL(expresswire_end, "EXPRESSWIRE");

-void expresswire_set_bit(struct expresswire_common_props *props, bool bit)
+static void expresswire_set_bit(struct expresswire_common_props *props, bool bit)
{
if (bit) {
gpiod_set_value(props->ctrl_gpio, 0);
@@ -60,13 +65,18 @@ void expresswire_set_bit(struct expresswire_common_props *props, bool bit)
udelay(props->timing.short_bitset_us);
}
}
-EXPORT_SYMBOL_NS_GPL(expresswire_set_bit, "EXPRESSWIRE");

void expresswire_write_u8(struct expresswire_common_props *props, u8 val)
{
+ unsigned long flags;
+
+ local_irq_save(flags);
+
expresswire_start(props);
for (int i = 7; i >= 0; i--)
expresswire_set_bit(props, val & BIT(i));
expresswire_end(props);
+
+ local_irq_restore(flags);
}
EXPORT_SYMBOL_NS_GPL(expresswire_write_u8, "EXPRESSWIRE");
diff --git a/drivers/leds/rgb/leds-qcom-lpg.c b/drivers/leds/rgb/leds-qcom-lpg.c
index e197f548cddb..a460782dadca 100644
--- a/drivers/leds/rgb/leds-qcom-lpg.c
+++ b/drivers/leds/rgb/leds-qcom-lpg.c
@@ -369,7 +369,7 @@ static int lpg_lut_store(struct lpg *lpg, struct led_pattern *pattern,
{
unsigned int idx;
u16 val;
- int i;
+ int i, ret;

idx = bitmap_find_next_zero_area(lpg->lut_bitmap, lpg->lut_size,
0, len, 0);
@@ -379,8 +379,10 @@ static int lpg_lut_store(struct lpg *lpg, struct led_pattern *pattern,
for (i = 0; i < len; i++) {
val = pattern[i].brightness;

- regmap_bulk_write(lpg->map, lpg->lut_base + LPG_LUT_REG(idx + i),
- &val, sizeof(val));
+ ret = regmap_bulk_write(lpg->map, lpg->lut_base + LPG_LUT_REG(idx + i),
+ &val, sizeof(val));
+ if (ret)
+ return ret;
}

bitmap_set(lpg->lut_bitmap, idx, len);
diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c
index ff292b9e0be9..0e0a66359d4c 100644
--- a/drivers/mailbox/pcc.c
+++ b/drivers/mailbox/pcc.c
@@ -305,22 +305,6 @@ static void pcc_chan_acknowledge(struct pcc_chan_info *pchan)
pcc_chan_reg_read_modify_write(&pchan->db);
}

-static void *write_response(struct pcc_chan_info *pchan)
-{
- struct pcc_header pcc_header;
- void *buffer;
- int data_len;
-
- memcpy_fromio(&pcc_header, pchan->chan.shmem,
- sizeof(pcc_header));
- data_len = pcc_header.length - sizeof(u32) + sizeof(struct pcc_header);
-
- buffer = pchan->chan.rx_alloc(pchan->chan.mchan->cl, data_len);
- if (buffer != NULL)
- memcpy_fromio(buffer, pchan->chan.shmem, data_len);
- return buffer;
-}
-
/**
* pcc_mbox_irq - PCC mailbox interrupt handler
* @irq: interrupt number
@@ -332,8 +316,6 @@ static irqreturn_t pcc_mbox_irq(int irq, void *p)
{
struct pcc_chan_info *pchan;
struct mbox_chan *chan = p;
- struct pcc_header *pcc_header = chan->active_req;
- void *handle = NULL;

pchan = chan->con_priv;

@@ -357,17 +339,7 @@ static irqreturn_t pcc_mbox_irq(int irq, void *p)
* required to avoid any possible race in updatation of this flag.
*/
pchan->chan_in_use = false;
-
- if (pchan->chan.rx_alloc)
- handle = write_response(pchan);
-
- if (chan->active_req) {
- pcc_header = chan->active_req;
- if (pcc_header->flags & PCC_CMD_COMPLETION_NOTIFY)
- mbox_chan_txdone(chan, 0);
- }
-
- mbox_chan_received_data(chan, handle);
+ mbox_chan_received_data(chan, NULL);

pcc_chan_acknowledge(pchan);

@@ -411,24 +383,9 @@ pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id)
pcc_mchan = &pchan->chan;
pcc_mchan->shmem = acpi_os_ioremap(pcc_mchan->shmem_base_addr,
pcc_mchan->shmem_size);
- if (!pcc_mchan->shmem)
- goto err;
-
- pcc_mchan->manage_writes = false;
-
- /* This indicates that the channel is ready to accept messages.
- * This needs to happen after the channel has registered
- * its callback. There is no access point to do that in
- * the mailbox API. That implies that the mailbox client must
- * have set the allocate callback function prior to
- * sending any messages.
- */
- if (pchan->type == ACPI_PCCT_TYPE_EXT_PCC_SLAVE_SUBSPACE)
- pcc_chan_reg_read_modify_write(&pchan->cmd_update);
-
- return pcc_mchan;
+ if (pcc_mchan->shmem)
+ return pcc_mchan;

-err:
mbox_free_channel(chan);
return ERR_PTR(-ENXIO);
}
@@ -459,38 +416,8 @@ void pcc_mbox_free_channel(struct pcc_mbox_chan *pchan)
}
EXPORT_SYMBOL_GPL(pcc_mbox_free_channel);

-static int pcc_write_to_buffer(struct mbox_chan *chan, void *data)
-{
- struct pcc_chan_info *pchan = chan->con_priv;
- struct pcc_mbox_chan *pcc_mbox_chan = &pchan->chan;
- struct pcc_header *pcc_header = data;
-
- if (!pchan->chan.manage_writes)
- return 0;
-
- /* The PCC header length includes the command field
- * but not the other values from the header.
- */
- int len = pcc_header->length - sizeof(u32) + sizeof(struct pcc_header);
- u64 val;
-
- pcc_chan_reg_read(&pchan->cmd_complete, &val);
- if (!val) {
- pr_info("%s pchan->cmd_complete not set", __func__);
- return -1;
- }
- memcpy_toio(pcc_mbox_chan->shmem, data, len);
- return 0;
-}
-
-
/**
- * pcc_send_data - Called from Mailbox Controller code. If
- * pchan->chan.rx_alloc is set, then the command complete
- * flag is checked and the data is written to the shared
- * buffer io memory.
- *
- * If pchan->chan.rx_alloc is not set, then it is used
+ * pcc_send_data - Called from Mailbox Controller code. Used
* here only to ring the channel doorbell. The PCC client
* specific read/write is done in the client driver in
* order to maintain atomicity over PCC channel once
@@ -506,37 +433,17 @@ static int pcc_send_data(struct mbox_chan *chan, void *data)
int ret;
struct pcc_chan_info *pchan = chan->con_priv;

- ret = pcc_write_to_buffer(chan, data);
- if (ret)
- return ret;
-
ret = pcc_chan_reg_read_modify_write(&pchan->cmd_update);
if (ret)
return ret;

ret = pcc_chan_reg_read_modify_write(&pchan->db);
-
if (!ret && pchan->plat_irq > 0)
pchan->chan_in_use = true;

return ret;
}

-
-static bool pcc_last_tx_done(struct mbox_chan *chan)
-{
- struct pcc_chan_info *pchan = chan->con_priv;
- u64 val;
-
- pcc_chan_reg_read(&pchan->cmd_complete, &val);
- if (!val)
- return false;
- else
- return true;
-}
-
-
-
/**
* pcc_startup - Called from Mailbox Controller code. Used here
* to request the interrupt.
@@ -582,7 +489,6 @@ static const struct mbox_chan_ops pcc_chan_ops = {
.send_data = pcc_send_data,
.startup = pcc_startup,
.shutdown = pcc_shutdown,
- .last_tx_done = pcc_last_tx_done,
};

/**
diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c
index c1367223e71a..3d487d75c483 100644
--- a/drivers/mcb/mcb-core.c
+++ b/drivers/mcb/mcb-core.c
@@ -85,7 +85,8 @@ static void mcb_remove(struct device *dev)
struct mcb_device *mdev = to_mcb_device(dev);
struct module *carrier_mod;

- mdrv->remove(mdev);
+ if (mdrv->remove)
+ mdrv->remove(mdev);

carrier_mod = mdev->dev.parent->driver->owner;
module_put(carrier_mod);
@@ -176,13 +177,13 @@ static const struct device_type mcb_carrier_device_type = {
* @owner: The @mcb_driver's module
* @mod_name: The name of the @mcb_driver's module
*
- * Register a @mcb_driver at the system. Perform some sanity checks, if
- * the .probe and .remove methods are provided by the driver.
+ * Register a @mcb_driver at the system. Perform a sanity check, if
+ * .probe method is provided by the driver.
*/
int __mcb_register_driver(struct mcb_driver *drv, struct module *owner,
const char *mod_name)
{
- if (!drv->probe || !drv->remove)
+ if (!drv->probe)
return -EINVAL;

drv->driver.owner = owner;
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 78e17dd4d01b..f4950b5f766d 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -56,7 +56,7 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
{
struct mapped_device *md = disk->private_data;
struct dm_table *map;
- struct dm_table *zone_revalidate_map = md->zone_revalidate_map;
+ struct dm_table *zone_revalidate_map = READ_ONCE(md->zone_revalidate_map);
int srcu_idx, ret = -EIO;
bool put_table = false;

@@ -66,11 +66,13 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
* Zone revalidation during __bind() is in progress, but this
* call is from a different process
*/
- if (dm_suspended_md(md))
- return -EAGAIN;
-
map = dm_get_live_table(md, &srcu_idx);
put_table = true;
+
+ if (dm_suspended_md(md)) {
+ ret = -EAGAIN;
+ goto do_put_table;
+ }
} else {
/* Zone revalidation during __bind() */
map = zone_revalidate_map;
@@ -80,6 +82,7 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb,
data);

+do_put_table:
if (put_table)
dm_put_live_table(md, srcu_idx);

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6c83ab940af7..52f01c44e73a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1363,6 +1363,8 @@ void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone)
if (!tgt_clone)
tgt_clone = clone;

+ bio_clone_blkg_association(tgt_clone, io->orig_bio);
+
/*
* Account io->origin_bio to DM dev on behalf of target
* that took ownership of IO with DM_MAPIO_SUBMITTED.
diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 1eb434306162..bcb6eae1c711 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -712,8 +712,10 @@ static int llbitmap_suspend_timeout(struct llbitmap *llbitmap, int page_idx)
percpu_ref_kill(&pctl->active);

if (!wait_event_timeout(pctl->wait, percpu_ref_is_zero(&pctl->active),
- llbitmap->mddev->bitmap_info.daemon_sleep * HZ))
+ llbitmap->mddev->bitmap_info.daemon_sleep * HZ)) {
+ percpu_ref_resurrect(&pctl->active);
return -ETIMEDOUT;
+ }

return 0;
}
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fd6e001c1d38..9d66afb8cc6e 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -736,8 +736,8 @@ static inline int mddev_trylock(struct mddev *mddev)
int ret;

ret = mutex_trylock(&mddev->reconfig_mutex);
- if (!ret && test_bit(MD_DELETED, &mddev->flags)) {
- ret = -ENODEV;
+ if (ret && test_bit(MD_DELETED, &mddev->flags)) {
+ ret = 0;
mutex_unlock(&mddev->reconfig_mutex);
}
return ret;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 592a40233004..ce7fd6886956 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3253,6 +3253,7 @@ static int raid1_run(struct mddev *mddev)
if (!mddev_is_dm(mddev)) {
ret = raid1_set_limits(mddev);
if (ret) {
+ md_unregister_thread(mddev, &conf->thread);
if (!mddev->private)
raid1_free(mddev, conf);
return ret;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 14dcd5142eb4..d58ae150b450 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3402,7 +3402,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
!test_bit(In_sync, &rdev->flags))
continue;
/* This is where we read from */
- any_working = 1;
sector = r10_bio->devs[j].addr;

if (is_badblock(rdev, sector, max_sync,
@@ -3417,6 +3416,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
continue;
}
}
+ any_working = 1;
bio = r10_bio->devs[0].bio;
bio->bi_next = biolist;
biolist = bio;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 41de29206402..3b711a1198ad 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3751,9 +3751,14 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
struct r5dev *dev = &sh->dev[disk_idx];
struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
&sh->dev[s->failed_num[1]] };
+ struct mddev *mddev = sh->raid_conf->mddev;
+ bool force_rcw = false;
int i;
- bool force_rcw = (sh->raid_conf->rmw_level == PARITY_DISABLE_RMW);

+ if (sh->raid_conf->rmw_level == PARITY_DISABLE_RMW ||
+ (mddev->bitmap_ops && mddev->bitmap_ops->blocks_synced &&
+ !mddev->bitmap_ops->blocks_synced(mddev, sh->sector)))
+ force_rcw = true;

if (test_bit(R5_LOCKED, &dev->flags) ||
test_bit(R5_UPTODATE, &dev->flags))
@@ -8056,7 +8061,8 @@ static int raid5_run(struct mddev *mddev)
goto abort;
}

- if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
+ ret = log_init(conf, journal_dev, raid5_has_ppl(conf));
+ if (ret)
goto abort;

return 0;
diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c
index 1c889c878abd..08e78f0bf252 100644
--- a/drivers/media/i2c/ccs/ccs-core.c
+++ b/drivers/media/i2c/ccs/ccs-core.c
@@ -3425,7 +3425,21 @@ static int ccs_probe(struct i2c_client *client)
sensor->scale_m = CCS_LIM(sensor, SCALER_N_MIN);

/* prepare PLL configuration input values */
- sensor->pll.bus_type = CCS_PLL_BUS_TYPE_CSI2_DPHY;
+ switch (sensor->hwcfg.csi_signalling_mode) {
+ case CCS_CSI_SIGNALING_MODE_CSI_2_CPHY:
+ sensor->pll.bus_type = CCS_PLL_BUS_TYPE_CSI2_CPHY;
+ break;
+ case CCS_CSI_SIGNALING_MODE_CSI_2_DPHY:
+ case SMIAPP_CSI_SIGNALLING_MODE_CCP2_DATA_CLOCK:
+ case SMIAPP_CSI_SIGNALLING_MODE_CCP2_DATA_STROBE:
+ sensor->pll.bus_type = CCS_PLL_BUS_TYPE_CSI2_DPHY;
+ break;
+ default:
+ dev_err(&client->dev, "unsupported signalling mode %u\n",
+ sensor->hwcfg.csi_signalling_mode);
+ rval = -EINVAL;
+ goto out_cleanup;
+ }
sensor->pll.csi2.lanes = sensor->hwcfg.lanes;
if (CCS_LIM(sensor, CLOCK_CALCULATION) &
CCS_CLOCK_CALCULATION_LANE_SPEED) {
diff --git a/drivers/media/pci/mgb4/mgb4_trigger.c b/drivers/media/pci/mgb4/mgb4_trigger.c
index d7dddc5c8728..10c23f0c833d 100644
--- a/drivers/media/pci/mgb4/mgb4_trigger.c
+++ b/drivers/media/pci/mgb4/mgb4_trigger.c
@@ -114,7 +114,7 @@ static int probe_trigger(struct iio_dev *indio_dev, int irq)
if (!st->trig)
return -ENOMEM;

- ret = request_irq(irq, &iio_trigger_generic_data_rdy_poll, 0,
+ ret = request_irq(irq, &iio_trigger_generic_data_rdy_poll, IRQF_NO_THREAD,
"mgb4-trigger", st->trig);
if (ret)
goto error_free_trig;
diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
index e3038c18ca36..a4387ed58cac 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
@@ -1753,8 +1753,10 @@ static int wave5_vpu_open_dec(struct file *filp)
spin_lock_init(&inst->state_spinlock);

inst->codec_info = kzalloc(sizeof(*inst->codec_info), GFP_KERNEL);
- if (!inst->codec_info)
+ if (!inst->codec_info) {
+ kfree(inst);
return -ENOMEM;
+ }

v4l2_fh_init(&inst->v4l2_fh, vdev);
v4l2_fh_add(&inst->v4l2_fh, filp);
diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c b/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c
index 9bfaa9fb3ceb..94fb5d7c8702 100644
--- a/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c
+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c
@@ -1578,8 +1578,10 @@ static int wave5_vpu_open_enc(struct file *filp)
inst->ops = &wave5_vpu_enc_inst_ops;

inst->codec_info = kzalloc(sizeof(*inst->codec_info), GFP_KERNEL);
- if (!inst->codec_info)
+ if (!inst->codec_info) {
+ kfree(inst);
return -ENOMEM;
+ }

v4l2_fh_init(&inst->v4l2_fh, vdev);
v4l2_fh_add(&inst->v4l2_fh, filp);
diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c
index 2094e059d7d3..ec76595f3c4b 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -1812,7 +1812,7 @@ static int uvc_alloc_urb_buffers(struct uvc_streaming *stream,
npackets = UVC_MAX_PACKETS;

/* Retry allocations until one succeed. */
- for (; npackets > 1; npackets /= 2) {
+ for (; npackets > 0; npackets /= 2) {
stream->urb_size = psize * npackets;

for (i = 0; i < UVC_URBS; ++i) {
@@ -1837,6 +1837,7 @@ static int uvc_alloc_urb_buffers(struct uvc_streaming *stream,
uvc_dbg(stream->dev, VIDEO,
"Failed to allocate URB buffers (%u bytes per packet)\n",
psize);
+ stream->urb_size = 0;
return 0;
}

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 6cec1858947b..55a9fea95195 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -406,6 +406,17 @@ config MFD_CS47L92
help
Support for Cirrus Logic CS42L92, CS47L92 and CS47L93 Smart Codecs

+config MFD_TN48M_CPLD
+ tristate "Delta Networks TN48M switch CPLD driver"
+ depends on I2C
+ depends on ARCH_MVEBU || COMPILE_TEST
+ select MFD_SIMPLE_MFD_I2C
+ help
+ Select this option to enable support for Delta Networks TN48M switch
+ CPLD. It consists of reset and GPIO drivers. CPLD provides GPIOS-s
+ for the SFP slots as well as power supply related information.
+ SFP support depends on the GPIO driver being selected.
+
config PMIC_DA903X
bool "Dialog Semiconductor DA9030/DA9034 PMIC Support"
depends on I2C=y
diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 85ff8717d850..91975536d14d 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -1100,7 +1100,7 @@ int arizona_dev_init(struct arizona *arizona)
} else if (val & 0x01) {
ret = wm5102_clear_write_sequencer(arizona);
if (ret)
- return ret;
+ goto err_reset;
}
break;
default:
diff --git a/drivers/mfd/simple-mfd-i2c.c b/drivers/mfd/simple-mfd-i2c.c
index 0a607a1e3ca1..9f911afafc25 100644
--- a/drivers/mfd/simple-mfd-i2c.c
+++ b/drivers/mfd/simple-mfd-i2c.c
@@ -110,6 +110,7 @@ static const struct simple_mfd_data spacemit_p1 = {
};

static const struct of_device_id simple_mfd_i2c_of_match[] = {
+ { .compatible = "delta,tn48m-cpld" },
{ .compatible = "fsl,ls1028aqds-fpga" },
{ .compatible = "fsl,lx2160aqds-fpga" },
{ .compatible = "fsl,lx2160ardb-fpga" },
diff --git a/drivers/mtd/devices/mtd_intel_dg.c b/drivers/mtd/devices/mtd_intel_dg.c
index b438ee5aacc3..114e69135b8d 100644
--- a/drivers/mtd/devices/mtd_intel_dg.c
+++ b/drivers/mtd/devices/mtd_intel_dg.c
@@ -738,6 +738,7 @@ static int intel_dg_mtd_probe(struct auxiliary_device *aux_dev,

kref_init(&nvm->refcnt);
mutex_init(&nvm->lock);
+ nvm->nregions = nregions;

for (n = 0, i = 0; i < INTEL_DG_NVM_REGIONS; i++) {
if (!invm->regions[i].name)
@@ -745,13 +746,15 @@ static int intel_dg_mtd_probe(struct auxiliary_device *aux_dev,

char *name = kasprintf(GFP_KERNEL, "%s.%s",
dev_name(&aux_dev->dev), invm->regions[i].name);
- if (!name)
- continue;
+ if (!name) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
nvm->regions[n].name = name;
nvm->regions[n].id = i;
n++;
}
- nvm->nregions = n; /* in case where kasprintf fail */

nvm->base = devm_ioremap_resource(device, &invm->bar);
if (IS_ERR(nvm->base)) {
diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c
index 32ed38b89394..21eabedc2ed1 100644
--- a/drivers/mtd/nand/raw/cadence-nand-controller.c
+++ b/drivers/mtd/nand/raw/cadence-nand-controller.c
@@ -1015,7 +1015,7 @@ static int cadence_nand_cdma_send(struct cdns_nand_ctrl *cdns_ctrl,
}

/* Send SDMA command and wait for finish. */
-static u32
+static int
cadence_nand_cdma_send_and_wait(struct cdns_nand_ctrl *cdns_ctrl,
u8 thread)
{
diff --git a/drivers/mtd/parsers/ofpart_core.c b/drivers/mtd/parsers/ofpart_core.c
index abfa68798918..09961c6f3949 100644
--- a/drivers/mtd/parsers/ofpart_core.c
+++ b/drivers/mtd/parsers/ofpart_core.c
@@ -77,6 +77,7 @@ static int parse_fixed_partitions(struct mtd_info *master,
of_id = of_match_node(parse_ofpart_match_table, ofpart_node);
if (dedicated && !of_id) {
/* The 'partitions' subnode might be used by another parser */
+ of_node_put(ofpart_node);
return 0;
}

@@ -91,12 +92,18 @@ static int parse_fixed_partitions(struct mtd_info *master,
nr_parts++;
}

- if (nr_parts == 0)
+ if (nr_parts == 0) {
+ if (dedicated)
+ of_node_put(ofpart_node);
return 0;
+ }

parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL);
- if (!parts)
+ if (!parts) {
+ if (dedicated)
+ of_node_put(ofpart_node);
return -ENOMEM;
+ }

i = 0;
for_each_child_of_node(ofpart_node, pp) {
@@ -175,6 +182,9 @@ static int parse_fixed_partitions(struct mtd_info *master,
if (quirks && quirks->post_parse)
quirks->post_parse(master, parts, nr_parts);

+ if (dedicated)
+ of_node_put(ofpart_node);
+
*pparts = parts;
return nr_parts;

@@ -183,6 +193,8 @@ static int parse_fixed_partitions(struct mtd_info *master,
master->name, pp, mtd_node);
ret = -EINVAL;
ofpart_none:
+ if (dedicated)
+ of_node_put(ofpart_node);
of_node_put(pp);
kfree(parts);
return ret;
diff --git a/drivers/mtd/parsers/tplink_safeloader.c b/drivers/mtd/parsers/tplink_safeloader.c
index e358a029dc70..4fcaf92d22e4 100644
--- a/drivers/mtd/parsers/tplink_safeloader.c
+++ b/drivers/mtd/parsers/tplink_safeloader.c
@@ -116,6 +116,7 @@ static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd,
return idx;

err_free:
+ kfree(buf);
for (idx -= 1; idx >= 0; idx--)
kfree(parts[idx].name);
err_free_parts:
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 51733fb29bd7..dba8f6869094 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -791,26 +791,29 @@ static int bond_update_speed_duplex(struct slave *slave)
struct ethtool_link_ksettings ecmd;
int res;

- slave->speed = SPEED_UNKNOWN;
- slave->duplex = DUPLEX_UNKNOWN;
-
res = __ethtool_get_link_ksettings(slave_dev, &ecmd);
if (res < 0)
- return 1;
+ goto speed_duplex_unknown;
if (ecmd.base.speed == 0 || ecmd.base.speed == ((__u32)-1))
- return 1;
+ goto speed_duplex_unknown;
switch (ecmd.base.duplex) {
case DUPLEX_FULL:
case DUPLEX_HALF:
break;
default:
- return 1;
+ goto speed_duplex_unknown;
}

slave->speed = ecmd.base.speed;
slave->duplex = ecmd.base.duplex;

return 0;
+
+speed_duplex_unknown:
+ slave->speed = SPEED_UNKNOWN;
+ slave->duplex = DUPLEX_UNKNOWN;
+
+ return 1;
}

const char *bond_slave_link_status(s8 link)
@@ -4402,9 +4405,13 @@ static int bond_close(struct net_device *bond_dev)

bond_work_cancel_all(bond);
bond->send_peer_notif = 0;
+ WRITE_ONCE(bond->recv_probe, NULL);
+
+ /* Wait for any in-flight RX handlers */
+ synchronize_net();
+
if (bond_is_lb(bond))
bond_alb_deinitialize(bond);
- bond->recv_probe = NULL;

if (BOND_MODE(bond) == BOND_MODE_8023AD &&
bond->params.broadcast_neighbor)
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index c398ac42eae9..b90890030751 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -284,6 +284,7 @@ static void ser_release(struct work_struct *work)
{
struct list_head list;
struct ser_device *ser, *tmp;
+ struct tty_struct *tty;

spin_lock(&ser_lock);
list_replace_init(&ser_release_list, &list);
@@ -292,9 +293,11 @@ static void ser_release(struct work_struct *work)
if (!list_empty(&list)) {
rtnl_lock();
list_for_each_entry_safe(ser, tmp, &list, node) {
+ tty = ser->tty;
dev_close(ser->dev);
unregister_netdevice(ser->dev);
debugfs_deinit(ser);
+ tty_kref_put(tty);
}
rtnl_unlock();
}
@@ -355,8 +358,6 @@ static void ldisc_close(struct tty_struct *tty)
{
struct ser_device *ser = tty->disc_data;

- tty_kref_put(ser->tty);
-
spin_lock(&ser_lock);
list_move(&ser->node, &ser_release_list);
spin_unlock(&ser_lock);
diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c
index 198f49b40dbf..2994f10446a6 100644
--- a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c
+++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c
@@ -442,7 +442,7 @@ __bnge_hwrm_reserve_pf_rings(struct bnge_dev *bd, struct bnge_hw_rings *hwr)
struct hwrm_func_cfg_input *req;
u32 enables = 0;

- if (bnge_hwrm_req_init(bd, req, HWRM_FUNC_QCFG))
+ if (bnge_hwrm_req_init(bd, req, HWRM_FUNC_CFG))
return NULL;

req->fid = cpu_to_le16(0xffff);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index e976a88b952f..c8eba180250e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1048,13 +1048,13 @@ static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
int order;

if (!alloc_size)
- return;
+ goto not_init;

order = get_order(alloc_size);
if (order > MAX_PAGE_ORDER) {
if (net_ratelimit())
dev_warn(ring_to_dev(ring), "failed to allocate tx spare buffer, exceed to max order\n");
- return;
+ goto not_init;
}

tx_spare = devm_kzalloc(ring_to_dev(ring), sizeof(*tx_spare),
@@ -1092,6 +1092,13 @@ static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
devm_kfree(ring_to_dev(ring), tx_spare);
devm_kzalloc_error:
ring->tqp->handle->kinfo.tx_spare_buf_size = 0;
+not_init:
+ /* When driver init or reset_init, the ring->tx_spare is always NULL;
+ * but when called from hns3_set_ringparam, it's usually not NULL, and
+ * will be restored if hns3_init_all_ring() failed. So it's safe to set
+ * ring->tx_spare to NULL here.
+ */
+ ring->tx_spare = NULL;
}

/* Use hns3_tx_spare_space() to make sure there is enough buffer
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
index b5805969404f..01e82d0b6b2c 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
@@ -307,7 +307,7 @@ static void octep_setup_iq_regs_cn93_pf(struct octep_device *oct, int iq_no)
}

/* Setup registers for a hardware Rx Queue */
-static void octep_setup_oq_regs_cn93_pf(struct octep_device *oct, int oq_no)
+static int octep_setup_oq_regs_cn93_pf(struct octep_device *oct, int oq_no)
{
u64 reg_val;
u64 oq_ctl = 0ULL;
@@ -355,6 +355,7 @@ static void octep_setup_oq_regs_cn93_pf(struct octep_device *oct, int oq_no)
reg_val = ((u64)time_threshold << 32) |
CFG_GET_OQ_INTR_PKT(oct->conf);
octep_write_csr64(oct, CN93_SDP_R_OUT_INT_LEVELS(oq_no), reg_val);
+ return 0;
}

/* Setup registers for a PF mailbox */
@@ -696,14 +697,26 @@ static void octep_enable_interrupts_cn93_pf(struct octep_device *oct)
/* Disable all interrupts */
static void octep_disable_interrupts_cn93_pf(struct octep_device *oct)
{
- u64 intr_mask = 0ULL;
+ u64 reg_val, intr_mask = 0ULL;
int srn, num_rings, i;

srn = CFG_GET_PORTS_PF_SRN(oct->conf);
num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);

- for (i = 0; i < num_rings; i++)
- intr_mask |= (0x1ULL << (srn + i));
+ for (i = 0; i < num_rings; i++) {
+ intr_mask |= BIT_ULL(srn + i);
+ reg_val = octep_read_csr64(oct,
+ CN93_SDP_R_IN_INT_LEVELS(srn + i));
+ reg_val &= ~CN93_INT_ENA_BIT;
+ octep_write_csr64(oct,
+ CN93_SDP_R_IN_INT_LEVELS(srn + i), reg_val);
+
+ reg_val = octep_read_csr64(oct,
+ CN93_SDP_R_OUT_INT_LEVELS(srn + i));
+ reg_val &= ~CN93_INT_ENA_BIT;
+ octep_write_csr64(oct,
+ CN93_SDP_R_OUT_INT_LEVELS(srn + i), reg_val);
+ }

octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask);
octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c
index 5de0b5ecbc5f..09a3f1d0645b 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c
@@ -8,6 +8,7 @@
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/jiffies.h>

#include "octep_config.h"
#include "octep_main.h"
@@ -327,12 +328,14 @@ static void octep_setup_iq_regs_cnxk_pf(struct octep_device *oct, int iq_no)
}

/* Setup registers for a hardware Rx Queue */
-static void octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no)
+static int octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no)
{
- u64 reg_val;
- u64 oq_ctl = 0ULL;
- u32 time_threshold = 0;
struct octep_oq *oq = oct->oq[oq_no];
+ unsigned long t_out_jiffies;
+ u32 time_threshold = 0;
+ u64 oq_ctl = 0ULL;
+ u64 reg_ba_val;
+ u64 reg_val;

oq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no));
@@ -343,6 +346,36 @@ static void octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no)
reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no));
} while (!(reg_val & CNXK_R_OUT_CTL_IDLE));
}
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_WMARK(oq_no), oq->max_count);
+ /* Wait for WMARK to get applied */
+ usleep_range(10, 15);
+
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(oq_no),
+ oq->desc_ring_dma);
+ octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(oq_no),
+ oq->max_count);
+ reg_ba_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(oq_no));
+
+ if (reg_ba_val != oq->desc_ring_dma) {
+ t_out_jiffies = jiffies + 10 * HZ;
+ do {
+ if (reg_ba_val == ULLONG_MAX)
+ return -EFAULT;
+ octep_write_csr64(oct,
+ CNXK_SDP_R_OUT_SLIST_BADDR(oq_no),
+ oq->desc_ring_dma);
+ octep_write_csr64(oct,
+ CNXK_SDP_R_OUT_SLIST_RSIZE(oq_no),
+ oq->max_count);
+ reg_ba_val =
+ octep_read_csr64(oct,
+ CNXK_SDP_R_OUT_SLIST_BADDR(oq_no));
+ } while ((reg_ba_val != oq->desc_ring_dma) &&
+ time_before(jiffies, t_out_jiffies));
+
+ if (reg_ba_val != oq->desc_ring_dma)
+ return -EAGAIN;
+ }

reg_val &= ~(CNXK_R_OUT_CTL_IMODE);
reg_val &= ~(CNXK_R_OUT_CTL_ROR_P);
@@ -356,10 +389,6 @@ static void octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no)
reg_val |= (CNXK_R_OUT_CTL_ES_P);

octep_write_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no), reg_val);
- octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(oq_no),
- oq->desc_ring_dma);
- octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(oq_no),
- oq->max_count);

oq_ctl = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no));

@@ -385,6 +414,7 @@ static void octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no)
reg_val &= ~0xFFFFFFFFULL;
reg_val |= CFG_GET_OQ_WMARK(oct->conf);
octep_write_csr64(oct, CNXK_SDP_R_OUT_WMARK(oq_no), reg_val);
+ return 0;
}

/* Setup registers for a PF mailbox */
@@ -720,14 +750,26 @@ static void octep_enable_interrupts_cnxk_pf(struct octep_device *oct)
/* Disable all interrupts */
static void octep_disable_interrupts_cnxk_pf(struct octep_device *oct)
{
- u64 intr_mask = 0ULL;
+ u64 reg_val, intr_mask = 0ULL;
int srn, num_rings, i;

srn = CFG_GET_PORTS_PF_SRN(oct->conf);
num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);

- for (i = 0; i < num_rings; i++)
- intr_mask |= (0x1ULL << (srn + i));
+ for (i = 0; i < num_rings; i++) {
+ intr_mask |= BIT_ULL(srn + i);
+ reg_val = octep_read_csr64(oct,
+ CNXK_SDP_R_IN_INT_LEVELS(srn + i));
+ reg_val &= ~CNXK_INT_ENA_BIT;
+ octep_write_csr64(oct,
+ CNXK_SDP_R_IN_INT_LEVELS(srn + i), reg_val);
+
+ reg_val = octep_read_csr64(oct,
+ CNXK_SDP_R_OUT_INT_LEVELS(srn + i));
+ reg_val &= ~CNXK_INT_ENA_BIT;
+ octep_write_csr64(oct,
+ CNXK_SDP_R_OUT_INT_LEVELS(srn + i), reg_val);
+ }

octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask);
octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
index 81ac4267811c..35d0ff289a70 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
@@ -77,7 +77,7 @@ struct octep_pci_win_regs {

struct octep_hw_ops {
void (*setup_iq_regs)(struct octep_device *oct, int q);
- void (*setup_oq_regs)(struct octep_device *oct, int q);
+ int (*setup_oq_regs)(struct octep_device *oct, int q);
void (*setup_mbox_regs)(struct octep_device *oct, int mbox);

irqreturn_t (*mbox_intr_handler)(void *ioq_vector);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
index ca473502d7a0..95f1dfff90cc 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
@@ -386,5 +386,6 @@
#define CN93_PEM_BAR4_INDEX 7
#define CN93_PEM_BAR4_INDEX_SIZE 0x400000ULL
#define CN93_PEM_BAR4_INDEX_OFFSET (CN93_PEM_BAR4_INDEX * CN93_PEM_BAR4_INDEX_SIZE)
+#define CN93_INT_ENA_BIT BIT_ULL(62)

#endif /* _OCTEP_REGS_CN9K_PF_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h
index e637d7c8224d..4d172a552f80 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h
@@ -412,5 +412,6 @@
#define CNXK_PEM_BAR4_INDEX 7
#define CNXK_PEM_BAR4_INDEX_SIZE 0x400000ULL
#define CNXK_PEM_BAR4_INDEX_OFFSET (CNXK_PEM_BAR4_INDEX * CNXK_PEM_BAR4_INDEX_SIZE)
+#define CNXK_INT_ENA_BIT BIT_ULL(62)

#endif /* _OCTEP_REGS_CNXK_PF_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
index 82b6b19e76b4..f2a7c6a76c74 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
@@ -12,6 +12,8 @@
#include "octep_config.h"
#include "octep_main.h"

+static void octep_oq_free_ring_buffers(struct octep_oq *oq);
+
static void octep_oq_reset_indices(struct octep_oq *oq)
{
oq->host_read_idx = 0;
@@ -170,11 +172,15 @@ static int octep_setup_oq(struct octep_device *oct, int q_no)
goto oq_fill_buff_err;

octep_oq_reset_indices(oq);
- oct->hw_ops.setup_oq_regs(oct, q_no);
+ if (oct->hw_ops.setup_oq_regs(oct, q_no))
+ goto oq_setup_err;
+
oct->num_oqs++;

return 0;

+oq_setup_err:
+ octep_oq_free_ring_buffers(oq);
oq_fill_buff_err:
vfree(oq->buff_info);
oq->buff_info = NULL;
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c
index 88937fce75f1..4c769b27c278 100644
--- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c
@@ -196,7 +196,7 @@ static void octep_vf_setup_iq_regs_cn93(struct octep_vf_device *oct, int iq_no)
}

/* Setup registers for a hardware Rx Queue */
-static void octep_vf_setup_oq_regs_cn93(struct octep_vf_device *oct, int oq_no)
+static int octep_vf_setup_oq_regs_cn93(struct octep_vf_device *oct, int oq_no)
{
struct octep_vf_oq *oq = oct->oq[oq_no];
u32 time_threshold = 0;
@@ -239,6 +239,7 @@ static void octep_vf_setup_oq_regs_cn93(struct octep_vf_device *oct, int oq_no)
time_threshold = CFG_GET_OQ_INTR_TIME(oct->conf);
reg_val = ((u64)time_threshold << 32) | CFG_GET_OQ_INTR_PKT(oct->conf);
octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(oq_no), reg_val);
+ return 0;
}

/* Setup registers for a VF mailbox */
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c
index 1f79dfad42c6..a968b93a6794 100644
--- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c
@@ -199,11 +199,13 @@ static void octep_vf_setup_iq_regs_cnxk(struct octep_vf_device *oct, int iq_no)
}

/* Setup registers for a hardware Rx Queue */
-static void octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no)
+static int octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no)
{
struct octep_vf_oq *oq = oct->oq[oq_no];
+ unsigned long t_out_jiffies;
u32 time_threshold = 0;
u64 oq_ctl = ULL(0);
+ u64 reg_ba_val;
u64 reg_val;

reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no));
@@ -214,6 +216,38 @@ static void octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no)
reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no));
} while (!(reg_val & CNXK_VF_R_OUT_CTL_IDLE));
}
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_WMARK(oq_no),
+ oq->max_count);
+ /* Wait for WMARK to get applied */
+ usleep_range(10, 15);
+
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_BADDR(oq_no),
+ oq->desc_ring_dma);
+ octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_RSIZE(oq_no),
+ oq->max_count);
+ reg_ba_val = octep_vf_read_csr64(oct,
+ CNXK_VF_SDP_R_OUT_SLIST_BADDR(oq_no));
+ if (reg_ba_val != oq->desc_ring_dma) {
+ t_out_jiffies = jiffies + 10 * HZ;
+ do {
+ if (reg_ba_val == ULLONG_MAX)
+ return -EFAULT;
+ octep_vf_write_csr64(oct,
+ CNXK_VF_SDP_R_OUT_SLIST_BADDR
+ (oq_no), oq->desc_ring_dma);
+ octep_vf_write_csr64(oct,
+ CNXK_VF_SDP_R_OUT_SLIST_RSIZE
+ (oq_no), oq->max_count);
+ reg_ba_val =
+ octep_vf_read_csr64(oct,
+ CNXK_VF_SDP_R_OUT_SLIST_BADDR
+ (oq_no));
+ } while ((reg_ba_val != oq->desc_ring_dma) &&
+ time_before(jiffies, t_out_jiffies));
+
+ if (reg_ba_val != oq->desc_ring_dma)
+ return -EAGAIN;
+ }

reg_val &= ~(CNXK_VF_R_OUT_CTL_IMODE);
reg_val &= ~(CNXK_VF_R_OUT_CTL_ROR_P);
@@ -227,8 +261,6 @@ static void octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no)
reg_val |= (CNXK_VF_R_OUT_CTL_ES_P);

octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no), reg_val);
- octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_BADDR(oq_no), oq->desc_ring_dma);
- octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_RSIZE(oq_no), oq->max_count);

oq_ctl = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no));
/* Clear the ISIZE and BSIZE (22-0) */
@@ -250,6 +282,7 @@ static void octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no)
reg_val &= ~GENMASK_ULL(31, 0);
reg_val |= CFG_GET_OQ_WMARK(oct->conf);
octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_WMARK(oq_no), reg_val);
+ return 0;
}

/* Setup registers for a VF mailbox */
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h
index b9f13506f462..c74cd2369e90 100644
--- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h
@@ -55,7 +55,7 @@ struct octep_vf_mmio {

struct octep_vf_hw_ops {
void (*setup_iq_regs)(struct octep_vf_device *oct, int q);
- void (*setup_oq_regs)(struct octep_vf_device *oct, int q);
+ int (*setup_oq_regs)(struct octep_vf_device *oct, int q);
void (*setup_mbox_regs)(struct octep_vf_device *oct, int mbox);

irqreturn_t (*non_ioq_intr_handler)(void *ioq_vector);
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
index d70c8be3cfc4..6f865dbbba6c 100644
--- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
@@ -12,6 +12,8 @@
#include "octep_vf_config.h"
#include "octep_vf_main.h"

+static void octep_vf_oq_free_ring_buffers(struct octep_vf_oq *oq);
+
static void octep_vf_oq_reset_indices(struct octep_vf_oq *oq)
{
oq->host_read_idx = 0;
@@ -171,11 +173,15 @@ static int octep_vf_setup_oq(struct octep_vf_device *oct, int q_no)
goto oq_fill_buff_err;

octep_vf_oq_reset_indices(oq);
- oct->hw_ops.setup_oq_regs(oct, q_no);
+ if (oct->hw_ops.setup_oq_regs(oct, q_no))
+ goto oq_setup_err;
+
oct->num_oqs++;

return 0;

+oq_setup_err:
+ octep_vf_oq_free_ring_buffers(oq);
oq_fill_buff_err:
vfree(oq->buff_info);
oq->buff_info = NULL;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 747fbdf2a908..8530df8b3fda 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -3632,11 +3632,22 @@ static void rvu_remove(struct pci_dev *pdev)
devm_kfree(&pdev->dev, rvu);
}

+static void rvu_shutdown(struct pci_dev *pdev)
+{
+ struct rvu *rvu = pci_get_drvdata(pdev);
+
+ if (!rvu)
+ return;
+
+ rvu_clear_rvum_blk_revid(rvu);
+}
+
static struct pci_driver rvu_driver = {
.name = DRV_NAME,
.id_table = rvu_id_table,
.probe = rvu_probe,
.remove = rvu_remove,
+ .shutdown = rvu_shutdown,
};

static int __init rvu_init_module(void)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index c7c70429eb6c..8658cb2143df 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -1042,32 +1042,35 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
rvu_write64(rvu, blkaddr,
NPC_AF_MCAMEX_BANKX_ACTION(index, bank), *(u64 *)&action);

- /* update the VF flow rule action with the VF default entry action */
- if (mcam_index < 0)
- npc_update_vf_flow_entry(rvu, mcam, blkaddr, pcifunc,
- *(u64 *)&action);
-
/* update the action change in default rule */
pfvf = rvu_get_pfvf(rvu, pcifunc);
if (pfvf->def_ucast_rule)
pfvf->def_ucast_rule->rx_action = action;

- index = npc_get_nixlf_mcam_index(mcam, pcifunc,
- nixlf, NIXLF_PROMISC_ENTRY);
+ if (mcam_index < 0) {
+ /* update the VF flow rule action with the VF default
+ * entry action
+ */
+ npc_update_vf_flow_entry(rvu, mcam, blkaddr, pcifunc,
+ *(u64 *)&action);

- /* If PF's promiscuous entry is enabled,
- * Set RSS action for that entry as well
- */
- npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, blkaddr,
- alg_idx);
+ index = npc_get_nixlf_mcam_index(mcam, pcifunc,
+ nixlf, NIXLF_PROMISC_ENTRY);

- index = npc_get_nixlf_mcam_index(mcam, pcifunc,
- nixlf, NIXLF_ALLMULTI_ENTRY);
- /* If PF's allmulti entry is enabled,
- * Set RSS action for that entry as well
- */
- npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, blkaddr,
- alg_idx);
+ /* If PF's promiscuous entry is enabled,
+ * Set RSS action for that entry as well
+ */
+ npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index,
+ blkaddr, alg_idx);
+
+ index = npc_get_nixlf_mcam_index(mcam, pcifunc,
+ nixlf, NIXLF_ALLMULTI_ENTRY);
+ /* If PF's allmulti entry is enabled,
+ * Set RSS action for that entry as well
+ */
+ npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index,
+ blkaddr, alg_idx);
+ }
}

void npc_enadis_default_mce_entry(struct rvu *rvu, u16 pcifunc,
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index a7a7bc0e1b67..bbf25769f499 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -3321,6 +3321,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
err_sriov_cleannup:
otx2_sriov_vfcfg_cleanup(pf);
err_pf_sriov_init:
+ otx2_unregister_dl(pf);
otx2_shutdown_tc(pf);
err_mcam_flow_del:
otx2_mcam_flow_del(pf);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index b34b85539f3b..5bced924a24f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -179,7 +179,8 @@ static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
}

/* Use this function to get max num channels (rxqs/txqs) only to create netdev */
-static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
+static inline unsigned int
+mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
{
return is_kdump_kernel() ?
MLX5E_MIN_NUM_CHANNELS :
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index c93ee969ea64..ec715b158a34 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -448,22 +448,8 @@ static void mlx5e_ptpsq_unhealthy_work(struct work_struct *work)
{
struct mlx5e_ptpsq *ptpsq =
container_of(work, struct mlx5e_ptpsq, report_unhealthy_work);
- struct mlx5e_txqsq *sq = &ptpsq->txqsq;
-
- /* Recovering the PTP SQ means re-enabling NAPI, which requires the
- * netdev instance lock. However, SQ closing has to wait for this work
- * task to finish while also holding the same lock. So either get the
- * lock or find that the SQ is no longer enabled and thus this work is
- * not relevant anymore.
- */
- while (!netdev_trylock(sq->netdev)) {
- if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))
- return;
- msleep(20);
- }

mlx5e_reporter_tx_ptpsq_unhealthy(ptpsq);
- netdev_unlock(sq->netdev);
}

static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index b1415992ffa2..a09a7c05820d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Mellanox Technologies.

+#include <net/netdev_lock.h>
+
#include "health.h"
#include "params.h"
#include "txrx.h"
@@ -177,6 +179,16 @@ static int mlx5e_rx_reporter_timeout_recover(void *ctx)
rq = ctx;
priv = rq->priv;

+ /* Acquire netdev instance lock to synchronize with channel close and
+ * reopen flows. Either successfully obtain the lock, or detect that
+ * channels are closing for another reason, making this work no longer
+ * necessary.
+ */
+ while (!netdev_trylock(rq->netdev)) {
+ if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state))
+ return 0;
+ msleep(20);
+ }
mutex_lock(&priv->state_lock);

eq = rq->cq.mcq.eq;
@@ -186,6 +198,7 @@ static int mlx5e_rx_reporter_timeout_recover(void *ctx)
clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state);

mutex_unlock(&priv->state_lock);
+ netdev_unlock(rq->netdev);

return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 9e2cf191ed30..9f6454102cf7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -1,6 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2019 Mellanox Technologies. */

+#include <net/netdev_lock.h>
+
#include "health.h"
#include "en/ptp.h"
#include "en/devlink.h"
@@ -78,6 +80,18 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
return 0;

+ /* Recovering queues means re-enabling NAPI, which requires the netdev
+ * instance lock. However, SQ closing flows have to wait for work tasks
+ * to finish while also holding the netdev instance lock. So either get
+ * the lock or find that the SQ is no longer enabled and thus this work
+ * is not relevant anymore.
+ */
+ while (!netdev_trylock(dev)) {
+ if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))
+ return 0;
+ msleep(20);
+ }
+
err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
if (err) {
netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
@@ -113,9 +127,11 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
else
mlx5e_trigger_napi_sched(sq->cq.napi);

+ netdev_unlock(dev);
return 0;
out:
clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ netdev_unlock(dev);
return err;
}

@@ -136,10 +152,24 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx)
sq = to_ctx->sq;
eq = sq->cq.mcq.eq;
priv = sq->priv;
+
+ /* Recovering the TX queues implies re-enabling NAPI, which requires
+ * the netdev instance lock.
+ * However, channel closing flows have to wait for this work to finish
+ * while holding the same lock. So either get the lock or find that
+ * channels are being closed for other reason and this work is not
+ * relevant anymore.
+ */
+ while (!netdev_trylock(sq->netdev)) {
+ if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state))
+ return 0;
+ msleep(20);
+ }
+
err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats);
if (!err) {
to_ctx->status = 0; /* this sq recovered */
- return err;
+ goto out;
}

mutex_lock(&priv->state_lock);
@@ -147,7 +177,7 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx)
mutex_unlock(&priv->state_lock);
if (!err) {
to_ctx->status = 1; /* all channels recovered */
- return err;
+ goto out;
}

to_ctx->status = err;
@@ -155,7 +185,8 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx)
netdev_err(priv->netdev,
"mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
err);
-
+out:
+ netdev_unlock(sq->netdev);
return err;
}

@@ -172,10 +203,22 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx)
return 0;

priv = ptpsq->txqsq.priv;
+ netdev = priv->netdev;
+
+ /* Recovering the PTP SQ means re-enabling NAPI, which requires the
+ * netdev instance lock. However, SQ closing has to wait for this work
+ * task to finish while also holding the same lock. So either get the
+ * lock or find that the SQ is no longer enabled and thus this work is
+ * not relevant anymore.
+ */
+ while (!netdev_trylock(netdev)) {
+ if (!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state))
+ return 0;
+ msleep(20);
+ }

mutex_lock(&priv->state_lock);
chs = &priv->channels;
- netdev = priv->netdev;

carrier_ok = netif_carrier_ok(netdev);
netif_carrier_off(netdev);
@@ -192,6 +235,7 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx)
netif_carrier_on(netdev);

mutex_unlock(&priv->state_lock);
+ netdev_unlock(netdev);

return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 59e17b41c3a6..cb993ad2d9ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -688,19 +688,7 @@ static void mlx5e_rq_timeout_work(struct work_struct *timeout_work)
struct mlx5e_rq,
rx_timeout_work);

- /* Acquire netdev instance lock to synchronize with channel close and
- * reopen flows. Either successfully obtain the lock, or detect that
- * channels are closing for another reason, making this work no longer
- * necessary.
- */
- while (!netdev_trylock(rq->netdev)) {
- if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state))
- return;
- msleep(20);
- }
-
mlx5e_reporter_rx_timeout(rq);
- netdev_unlock(rq->netdev);
}

static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
@@ -1997,20 +1985,7 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
recover_work);

- /* Recovering queues means re-enabling NAPI, which requires the netdev
- * instance lock. However, SQ closing flows have to wait for work tasks
- * to finish while also holding the netdev instance lock. So either get
- * the lock or find that the SQ is no longer enabled and thus this work
- * is not relevant anymore.
- */
- while (!netdev_trylock(sq->netdev)) {
- if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))
- return;
- msleep(20);
- }
-
mlx5e_reporter_tx_err_cqe(sq);
- netdev_unlock(sq->netdev);
}

static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
@@ -5102,19 +5077,6 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
struct net_device *netdev = priv->netdev;
int i;

- /* Recovering the TX queues implies re-enabling NAPI, which requires
- * the netdev instance lock.
- * However, channel closing flows have to wait for this work to finish
- * while holding the same lock. So either get the lock or find that
- * channels are being closed for other reason and this work is not
- * relevant anymore.
- */
- while (!netdev_trylock(netdev)) {
- if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state))
- return;
- msleep(20);
- }
-
for (i = 0; i < netdev->real_num_tx_queues; i++) {
struct netdev_queue *dev_queue =
netdev_get_tx_queue(netdev, i);
@@ -5127,8 +5089,6 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
/* break if tried to reopened channels */
break;
}
-
- netdev_unlock(netdev);
}

static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
index 05e5fd777d4f..8701b7b6a2d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
@@ -2,6 +2,7 @@
// Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.

#include <linux/io.h>
+#include <linux/iopoll.h>
#include <linux/mlx5/transobj.h>
#include "lib/clock.h"
#include "mlx5_core.h"
@@ -14,7 +15,7 @@
#define TEST_WC_NUM_WQES 255
#define TEST_WC_LOG_CQ_SZ (order_base_2(TEST_WC_NUM_WQES))
#define TEST_WC_SQ_LOG_WQ_SZ TEST_WC_LOG_CQ_SZ
-#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100)
+#define TEST_WC_POLLING_MAX_TIME_USEC (100 * USEC_PER_MSEC)

struct mlx5_wc_cq {
/* data path - accessed per cqe */
@@ -358,7 +359,6 @@ static int mlx5_wc_poll_cq(struct mlx5_wc_sq *sq)
static void mlx5_core_test_wc(struct mlx5_core_dev *mdev)
{
unsigned int offset = 0;
- unsigned long expires;
struct mlx5_wc_sq *sq;
int i, err;

@@ -388,13 +388,9 @@ static void mlx5_core_test_wc(struct mlx5_core_dev *mdev)

mlx5_wc_post_nop(sq, &offset, true);

- expires = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
- do {
- err = mlx5_wc_poll_cq(sq);
- if (err)
- usleep_range(2, 10);
- } while (mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED &&
- time_is_after_jiffies(expires));
+ poll_timeout_us(mlx5_wc_poll_cq(sq),
+ mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED, 10,
+ TEST_WC_POLLING_MAX_TIME_USEC, false);

mlx5_wc_destroy_sq(sq);

diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
index 95fac020eb93..08aed4103323 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
@@ -1142,6 +1142,9 @@ static int fbnic_set_cls_rule_ins(struct fbnic_net *fbn,
return -EINVAL;
}

+ dest |= FIELD_PREP(FBNIC_RPC_ACT_TBL0_DMA_HINT,
+ FBNIC_RCD_HDR_AL_DMA_HINT_L4);
+
/* Write action table values */
act_tcam->dest = dest;
act_tcam->rss_en_mask = fbnic_flow_hash_2_rss_en_mask(fbn, hash_idx);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c
index 85a883dba385..d8a9a7d7c237 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c
@@ -51,8 +51,6 @@ int fbnic_fw_log_init(struct fbnic_dev *fbd)
log->data_start = data;
log->data_end = data + FBNIC_FW_LOG_SIZE;

- fbnic_fw_log_enable(fbd, true);
-
return 0;
}

@@ -63,7 +61,6 @@ void fbnic_fw_log_free(struct fbnic_dev *fbd)
if (!fbnic_fw_log_ready(fbd))
return;

- fbnic_fw_log_disable(fbd);
INIT_LIST_HEAD(&log->entries);
log->size = 0;
vfree(log->data_start);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
index e95be0e7bd9e..cbedaa037cfa 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -262,6 +262,23 @@ static int fbnic_set_mac(struct net_device *netdev, void *p)
return 0;
}

+static int fbnic_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct fbnic_net *fbn = netdev_priv(dev);
+
+ if (fbnic_check_split_frames(fbn->xdp_prog, new_mtu, fbn->hds_thresh)) {
+ dev_err(&dev->dev,
+ "MTU %d is larger than HDS threshold %d in XDP mode\n",
+ new_mtu, fbn->hds_thresh);
+
+ return -EINVAL;
+ }
+
+ WRITE_ONCE(dev->mtu, new_mtu);
+
+ return 0;
+}
+
void fbnic_clear_rx_mode(struct fbnic_dev *fbd)
{
struct net_device *netdev = fbd->netdev;
@@ -533,6 +550,7 @@ static const struct net_device_ops fbnic_netdev_ops = {
.ndo_start_xmit = fbnic_xmit_frame,
.ndo_features_check = fbnic_features_check,
.ndo_set_mac_address = fbnic_set_mac,
+ .ndo_change_mtu = fbnic_change_mtu,
.ndo_set_rx_mode = fbnic_set_rx_mode,
.ndo_get_stats64 = fbnic_get_stats64,
.ndo_bpf = fbnic_bpf,
@@ -790,6 +808,8 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd)
netdev->hw_enc_features |= netdev->features;
netdev->features |= NETIF_F_NTUPLE;

+ netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_RX_SG;
+
netdev->min_mtu = IPV6_MIN_MTU;
netdev->max_mtu = FBNIC_MAX_JUMBO_FRAME_SIZE - ETH_HLEN;

diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
index b0a87c57910f..e6ca23a9957d 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
@@ -73,6 +73,8 @@ struct fbnic_net {

/* Time stamping filter config */
struct kernel_hwtstamp_config hwtstamp_config;
+
+ bool tx_pause;
};

int __fbnic_open(struct fbnic_net *fbn);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
index 0fa90baad5f8..698b8a85afb3 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
@@ -303,11 +303,17 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto free_irqs;
}

+ err = fbnic_fw_log_init(fbd);
+ if (err)
+ dev_warn(fbd->dev,
+ "Unable to initialize firmware log buffer: %d\n",
+ err);
+
err = fbnic_fw_request_mbx(fbd);
if (err) {
dev_err(&pdev->dev,
"Firmware mailbox initialization failure\n");
- goto free_irqs;
+ goto free_fw_log;
}

/* Send the request to enable the FW logging to host. Note if this
@@ -315,11 +321,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* possible the FW is just too old to support the logging and needs
* to be updated.
*/
- err = fbnic_fw_log_init(fbd);
- if (err)
- dev_warn(fbd->dev,
- "Unable to initialize firmware log buffer: %d\n",
- err);
+ fbnic_fw_log_enable(fbd, true);

fbnic_devlink_register(fbd);
fbnic_devlink_otp_check(fbd, "error detected during probe");
@@ -363,6 +365,8 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* firmware updates for fixes.
*/
return 0;
+free_fw_log:
+ fbnic_fw_log_free(fbd);
free_irqs:
fbnic_free_irqs(fbd);
err_destroy_health:
@@ -397,8 +401,9 @@ static void fbnic_remove(struct pci_dev *pdev)
fbnic_hwmon_unregister(fbd);
fbnic_dbg_fbd_exit(fbd);
fbnic_devlink_unregister(fbd);
- fbnic_fw_log_free(fbd);
+ fbnic_fw_log_disable(fbd);
fbnic_fw_free_mbx(fbd);
+ fbnic_fw_log_free(fbd);
fbnic_free_irqs(fbd);

fbnic_devlink_health_destroy(fbd);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c
index 7ce3fdd25282..62701923cfe9 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_phylink.c
@@ -208,6 +208,9 @@ fbnic_phylink_mac_link_up(struct phylink_config *config,
struct fbnic_net *fbn = netdev_priv(netdev);
struct fbnic_dev *fbd = fbn->fbd;

+ fbn->tx_pause = tx_pause;
+ fbnic_config_drop_mode(fbn, tx_pause);
+
fbd->mac->link_up(fbd, tx_pause, rx_pause);
}

diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
index 7f31e890031c..42a186db43ea 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
@@ -338,9 +338,8 @@ void fbnic_rss_reinit(struct fbnic_dev *fbd, struct fbnic_net *fbn)
else if (tstamp_mask & (1u << flow_type))
dest |= FBNIC_RPC_ACT_TBL0_TS_ENA;

- if (act1_value[flow_type] & FBNIC_RPC_TCAM_ACT1_L4_VALID)
- dest |= FIELD_PREP(FBNIC_RPC_ACT_TBL0_DMA_HINT,
- FBNIC_RCD_HDR_AL_DMA_HINT_L4);
+ dest |= FIELD_PREP(FBNIC_RPC_ACT_TBL0_DMA_HINT,
+ FBNIC_RCD_HDR_AL_DMA_HINT_L4);

rss_en_mask = fbnic_flow_hash_2_rss_en_mask(fbn, flow_type);

diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index b1e8ce89870f..fbdf79b6ad2d 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -2573,20 +2573,44 @@ static void fbnic_enable_bdq(struct fbnic_ring *hpq, struct fbnic_ring *ppq)
}

static void fbnic_config_drop_mode_rcq(struct fbnic_napi_vector *nv,
- struct fbnic_ring *rcq)
+ struct fbnic_ring *rcq, bool tx_pause,
+ bool hdr_split)
{
+ struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
u32 drop_mode, rcq_ctl;

- drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_IMMEDIATE;
+ if (!tx_pause && fbn->num_rx_queues > 1)
+ drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_IMMEDIATE;
+ else
+ drop_mode = FBNIC_QUEUE_RDE_CTL0_DROP_NEVER;

/* Specify packet layout */
rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_DROP_MODE_MASK, drop_mode) |
FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_HROOM_MASK, FBNIC_RX_HROOM) |
- FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_TROOM_MASK, FBNIC_RX_TROOM);
+ FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_MIN_TROOM_MASK, FBNIC_RX_TROOM) |
+ FIELD_PREP(FBNIC_QUEUE_RDE_CTL0_EN_HDR_SPLIT, hdr_split);

fbnic_ring_wr32(rcq, FBNIC_QUEUE_RDE_CTL0, rcq_ctl);
}

+void fbnic_config_drop_mode(struct fbnic_net *fbn, bool txp)
+{
+ bool hds;
+ int i, t;
+
+ hds = fbn->hds_thresh < FBNIC_HDR_BYTES_MIN;
+
+ for (i = 0; i < fbn->num_napi; i++) {
+ struct fbnic_napi_vector *nv = fbn->napi[i];
+
+ for (t = 0; t < nv->rxt_count; t++) {
+ struct fbnic_q_triad *qt = &nv->qt[nv->txt_count + t];
+
+ fbnic_config_drop_mode_rcq(nv, &qt->cmpl, txp, hds);
+ }
+ }
+}
+
static void fbnic_config_rim_threshold(struct fbnic_ring *rcq, u16 nv_idx, u32 rx_desc)
{
u32 threshold;
@@ -2633,20 +2657,18 @@ static void fbnic_enable_rcq(struct fbnic_napi_vector *nv,
{
struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
u32 log_size = fls(rcq->size_mask);
- u32 hds_thresh = fbn->hds_thresh;
u32 rcq_ctl = 0;
-
- fbnic_config_drop_mode_rcq(nv, rcq);
+ bool hdr_split;
+ u32 hds_thresh;

/* Force lower bound on MAX_HEADER_BYTES. Below this, all frames should
* be split at L4. It would also result in the frames being split at
* L2/L3 depending on the frame size.
*/
- if (fbn->hds_thresh < FBNIC_HDR_BYTES_MIN) {
- rcq_ctl = FBNIC_QUEUE_RDE_CTL0_EN_HDR_SPLIT;
- hds_thresh = FBNIC_HDR_BYTES_MIN;
- }
+ hdr_split = fbn->hds_thresh < FBNIC_HDR_BYTES_MIN;
+ fbnic_config_drop_mode_rcq(nv, rcq, fbn->tx_pause, hdr_split);

+ hds_thresh = max(fbn->hds_thresh, FBNIC_HDR_BYTES_MIN);
rcq_ctl |= FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) |
FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, hds_thresh) |
FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK,
@@ -2699,7 +2721,6 @@ static void __fbnic_nv_enable(struct fbnic_napi_vector *nv)
&nv->napi);

fbnic_enable_bdq(&qt->sub0, &qt->sub1);
- fbnic_config_drop_mode_rcq(nv, &qt->cmpl);
fbnic_enable_rcq(nv, &qt->cmpl);
}
}
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
index ca37da5a0b17..51a98f27d5d9 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
@@ -66,7 +66,7 @@ struct fbnic_net;
(4096 - FBNIC_RX_HROOM - FBNIC_RX_TROOM - FBNIC_RX_PAD)
#define FBNIC_HDS_THRESH_DEFAULT \
(1536 - FBNIC_RX_PAD)
-#define FBNIC_HDR_BYTES_MIN 128
+#define FBNIC_HDR_BYTES_MIN 256

struct fbnic_pkt_buff {
struct xdp_buff buff;
@@ -184,6 +184,7 @@ void fbnic_reset_netif_queues(struct fbnic_net *fbn);
irqreturn_t fbnic_msix_clean_rings(int irq, void *data);
void fbnic_napi_enable(struct fbnic_net *fbn);
void fbnic_napi_disable(struct fbnic_net *fbn);
+void fbnic_config_drop_mode(struct fbnic_net *fbn, bool tx_pause);
void fbnic_enable(struct fbnic_net *fbn);
void fbnic_disable(struct fbnic_net *fbn);
void fbnic_flush(struct fbnic_net *fbn);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c
index 2f168700f63c..8b2e07821a95 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c
@@ -576,7 +576,7 @@ static int sparx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
static struct ptp_clock_info sparx5_ptp_clock_info = {
.owner = THIS_MODULE,
.name = "sparx5 ptp",
- .max_adj = 200000,
+ .max_adj = 10000000,
.gettime64 = sparx5_ptp_gettime64,
.settime64 = sparx5_ptp_settime64,
.adjtime = sparx5_ptp_adjtime,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_qos.h b/drivers/net/ethernet/microchip/sparx5/sparx5_qos.h
index 1231a80335d7..04f76f1e23f6 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_qos.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_qos.h
@@ -35,7 +35,7 @@
#define SPX5_SE_BURST_UNIT 4096

/* Dwrr */
-#define SPX5_DWRR_COST_MAX 63
+#define SPX5_DWRR_COST_MAX 31

struct sparx5_shaper {
u32 mode;
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 469784d3a1a6..1b8269320464 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -551,44 +551,81 @@ static int ocelot_port_stop(struct net_device *dev)
return 0;
}

-static netdev_tx_t ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
+static bool ocelot_xmit_timestamp(struct ocelot *ocelot, int port,
+ struct sk_buff *skb, u32 *rew_op)
{
- struct ocelot_port_private *priv = netdev_priv(dev);
- struct ocelot_port *ocelot_port = &priv->port;
- struct ocelot *ocelot = ocelot_port->ocelot;
- int port = priv->port.index;
- u32 rew_op = 0;
-
- if (!static_branch_unlikely(&ocelot_fdma_enabled) &&
- !ocelot_can_inject(ocelot, 0))
- return NETDEV_TX_BUSY;
-
- /* Check if timestamping is needed */
if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
struct sk_buff *clone = NULL;

if (ocelot_port_txtstamp_request(ocelot, port, skb, &clone)) {
kfree_skb(skb);
- return NETDEV_TX_OK;
+ return false;
}

if (clone)
OCELOT_SKB_CB(skb)->clone = clone;

- rew_op = ocelot_ptp_rew_op(skb);
+ *rew_op = ocelot_ptp_rew_op(skb);
}

- if (static_branch_unlikely(&ocelot_fdma_enabled)) {
- ocelot_fdma_inject_frame(ocelot, port, rew_op, skb, dev);
- } else {
- ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
+ return true;
+}
+
+static netdev_tx_t ocelot_port_xmit_fdma(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ocelot_port_private *priv = netdev_priv(dev);
+ struct ocelot_port *ocelot_port = &priv->port;
+ struct ocelot *ocelot = ocelot_port->ocelot;
+ int port = priv->port.index;
+ u32 rew_op = 0;
+
+ if (!ocelot_xmit_timestamp(ocelot, port, skb, &rew_op))
+ return NETDEV_TX_OK;
+
+ ocelot_fdma_inject_frame(ocelot, port, rew_op, skb, dev);
+
+ return NETDEV_TX_OK;
+}

- consume_skb(skb);
+static netdev_tx_t ocelot_port_xmit_inj(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ocelot_port_private *priv = netdev_priv(dev);
+ struct ocelot_port *ocelot_port = &priv->port;
+ struct ocelot *ocelot = ocelot_port->ocelot;
+ int port = priv->port.index;
+ u32 rew_op = 0;
+
+ ocelot_lock_inj_grp(ocelot, 0);
+
+ if (!ocelot_can_inject(ocelot, 0)) {
+ ocelot_unlock_inj_grp(ocelot, 0);
+ return NETDEV_TX_BUSY;
}

+ if (!ocelot_xmit_timestamp(ocelot, port, skb, &rew_op)) {
+ ocelot_unlock_inj_grp(ocelot, 0);
+ return NETDEV_TX_OK;
+ }
+
+ ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
+
+ ocelot_unlock_inj_grp(ocelot, 0);
+
+ consume_skb(skb);
+
return NETDEV_TX_OK;
}

+static netdev_tx_t ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ if (static_branch_unlikely(&ocelot_fdma_enabled))
+ return ocelot_port_xmit_fdma(skb, dev);
+
+ return ocelot_port_xmit_inj(skb, dev);
+}
+
enum ocelot_action_type {
OCELOT_MACT_LEARN,
OCELOT_MACT_FORGET,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 2d9efadb5d2a..347b0aff100b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -263,9 +263,10 @@ static int ionic_get_link_ksettings(struct net_device *netdev,
/* This means there's no module plugged in */
break;
default:
- dev_info(lif->ionic->dev, "unknown xcvr type pid=%d / 0x%x\n",
- idev->port_info->status.xcvr.pid,
- idev->port_info->status.xcvr.pid);
+ dev_dbg_ratelimited(lif->ionic->dev,
+ "unknown xcvr type pid=%d / 0x%x\n",
+ idev->port_info->status.xcvr.pid,
+ idev->port_info->status.xcvr.pid);
break;
}

diff --git a/drivers/net/ethernet/renesas/rswitch_l2.c b/drivers/net/ethernet/renesas/rswitch_l2.c
index 4a69ec77d69c..9433cd8adced 100644
--- a/drivers/net/ethernet/renesas/rswitch_l2.c
+++ b/drivers/net/ethernet/renesas/rswitch_l2.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Renesas Ethernet Switch device driver
*
- * Copyright (C) 2025 Renesas Electronics Corporation
+ * Copyright (C) 2025 - 2026 Renesas Electronics Corporation
*/

#include <linux/err.h>
@@ -60,6 +60,7 @@ static void rswitch_update_l2_hw_learning(struct rswitch_private *priv)
static void rswitch_update_l2_hw_forwarding(struct rswitch_private *priv)
{
struct rswitch_device *rdev;
+ bool new_forwarding_offload;
unsigned int fwd_mask;

/* calculate fwd_mask with zeroes in bits corresponding to ports that
@@ -73,8 +74,9 @@ static void rswitch_update_l2_hw_forwarding(struct rswitch_private *priv)
}

rswitch_for_all_ports(priv, rdev) {
- if ((rdev_for_l2_offload(rdev) && rdev->forwarding_requested) ||
- rdev->forwarding_offloaded) {
+ new_forwarding_offload = (rdev_for_l2_offload(rdev) && rdev->forwarding_requested);
+
+ if (new_forwarding_offload || rdev->forwarding_offloaded) {
/* Update allowed offload destinations even for ports
* with L2 offload enabled earlier.
*
@@ -84,13 +86,10 @@ static void rswitch_update_l2_hw_forwarding(struct rswitch_private *priv)
priv->addr + FWPC2(rdev->port));
}

- if (rdev_for_l2_offload(rdev) &&
- rdev->forwarding_requested &&
- !rdev->forwarding_offloaded) {
+ if (new_forwarding_offload && !rdev->forwarding_offloaded)
rswitch_change_l2_hw_offloading(rdev, true, false);
- } else if (rdev->forwarding_offloaded) {
+ else if (!new_forwarding_offload && rdev->forwarding_offloaded)
rswitch_change_l2_hw_offloading(rdev, false, false);
- }
}
}

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 8f34c9ad457f..23ec3a59ca8f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -43,6 +43,11 @@
#define DWXGMAC_ID 0x76
#define DWXLGMAC_ID 0x27

+static inline bool dwmac_is_xmac(enum dwmac_core_type core_type)
+{
+ return core_type == DWMAC_CORE_GMAC4 || core_type == DWMAC_CORE_XGMAC;
+}
+
#define STMMAC_CHAN0 0 /* Always supported and default for all chips */

/* TX and RX Descriptor Length, these need to be power of two.
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index e8539cad4602..1d30f2fb984f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -109,7 +109,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
}

/* dwc-qos needs GMAC4, AAL, TSO and PMT */
- plat_dat->has_gmac4 = 1;
+ plat_dat->core_type = DWMAC_CORE_GMAC4;
plat_dat->dma_cfg->aal = 1;
plat_dat->flags |= STMMAC_FLAG_TSO_EN;
plat_dat->pmt = 1;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index e74d00984b88..b2194e414ec1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -565,7 +565,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat)
{
/* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
plat->clk_csr = STMMAC_CSR_20_35M;
- plat->has_gmac = 1;
+ plat->core_type = DWMAC_CORE_GMAC;
plat->force_sf_dma_mode = 1;

plat->mdio_bus_data->needs_reset = true;
@@ -612,8 +612,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
plat->pdev = pdev;
plat->phy_addr = -1;
plat->clk_csr = STMMAC_CSR_250_300M;
- plat->has_gmac = 0;
- plat->has_gmac4 = 1;
+ plat->core_type = DWMAC_CORE_GMAC4;
plat->force_sf_dma_mode = 0;
plat->flags |= (STMMAC_FLAG_TSO_EN | STMMAC_FLAG_SPH_DISABLE);

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index ca4035cbb55b..c05f85534f0c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -473,7 +473,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
return err;
}

- plat_dat->has_gmac = true;
+ plat_dat->core_type = DWMAC_CORE_GMAC;
plat_dat->bsp_priv = gmac;
plat_dat->set_clk_tx_rate = ipq806x_gmac_set_clk_tx_rate;
plat_dat->multicast_filter_bins = 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
index 592aa9d636e5..47bc3aeee857 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -90,9 +90,9 @@ static void loongson_default_data(struct pci_dev *pdev,
/* Get bus_id, this can be overwritten later */
plat->bus_id = pci_dev_id(pdev);

- /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
- plat->clk_csr = STMMAC_CSR_20_35M;
- plat->has_gmac = 1;
+ /* clk_csr_i = 100-150MHz & MDC = clk_csr_i/62 */
+ plat->clk_csr = STMMAC_CSR_100_150M;
+ plat->core_type = DWMAC_CORE_GMAC;
plat->force_sf_dma_mode = 1;

/* Set default value for multicast hash bins */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
index 2562a6d036a2..6fffc9dfbae5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
@@ -41,7 +41,7 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev)
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);

- plat_dat->has_gmac = true;
+ plat_dat->core_type = DWMAC_CORE_GMAC;

reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg");
if (IS_ERR(reg)) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index d8fd4d8f6ced..74c208dd8651 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -96,7 +96,6 @@ struct ethqos_emac_driver_data {
bool rgmii_config_loopback_en;
bool has_emac_ge_3;
const char *link_clk_name;
- bool has_integrated_pcs;
u32 dma_addr_width;
struct dwmac4_addrs dwmac4_addrs;
bool needs_sgmii_loopback;
@@ -282,7 +281,6 @@ static const struct ethqos_emac_driver_data emac_v4_0_0_data = {
.rgmii_config_loopback_en = false,
.has_emac_ge_3 = true,
.link_clk_name = "phyaux",
- .has_integrated_pcs = true,
.needs_sgmii_loopback = true,
.dma_addr_width = 36,
.dwmac4_addrs = {
@@ -848,7 +846,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
plat_dat->fix_mac_speed = ethqos_fix_mac_speed;
plat_dat->dump_debug_regs = rgmii_dump;
plat_dat->ptp_clk_freq_config = ethqos_ptp_clk_freq_config;
- plat_dat->has_gmac4 = 1;
+ plat_dat->core_type = DWMAC_CORE_GMAC4;
if (ethqos->has_emac_ge_3)
plat_dat->dwmac4_addrs = &data->dwmac4_addrs;
plat_dat->pmt = 1;
@@ -856,8 +854,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
plat_dat->flags |= STMMAC_FLAG_TSO_EN;
if (of_device_is_compatible(np, "qcom,qcs404-ethqos"))
plat_dat->flags |= STMMAC_FLAG_RX_CLK_RUNS_IN_LPI;
- if (data->has_integrated_pcs)
- plat_dat->flags |= STMMAC_FLAG_HAS_INTEGRATED_PCS;
if (data->dma_addr_width)
plat_dat->host_dma_width = data->dma_addr_width;

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 0786816e05f0..643578266dfc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1751,8 +1751,8 @@ static int rk_gmac_probe(struct platform_device *pdev)
/* If the stmmac is not already selected as gmac4,
* then make sure we fallback to gmac.
*/
- if (!plat_dat->has_gmac4) {
- plat_dat->has_gmac = true;
+ if (plat_dat->core_type != DWMAC_CORE_GMAC4) {
+ plat_dat->core_type = DWMAC_CORE_GMAC;
plat_dat->rx_fifo_size = 4096;
plat_dat->tx_fifo_size = 2048;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c
index 221539d760bc..ee095ac13203 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c
@@ -146,7 +146,7 @@ static int s32_dwmac_probe(struct platform_device *pdev)
gmac->ioaddr = res.addr;

/* S32CC core feature set */
- plat->has_gmac4 = true;
+ plat->core_type = DWMAC_CORE_GMAC4;
plat->pmt = 1;
plat->flags |= STMMAC_FLAG_SPH_DISABLE;
plat->rx_fifo_size = 20480;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index 354f01184e6c..2ff5db6d41ca 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -497,7 +497,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
plat_dat->pcs_init = socfpga_dwmac_pcs_init;
plat_dat->pcs_exit = socfpga_dwmac_pcs_exit;
plat_dat->select_pcs = socfpga_dwmac_select_pcs;
- plat_dat->has_gmac = true;
+ plat_dat->core_type = DWMAC_CORE_GMAC;

plat_dat->riwt_off = 1;

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index 1eadcf5d1ad6..7f560d78209d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -136,7 +136,7 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
/* platform data specifying hardware features and callbacks.
* hardware features were copied from Allwinner drivers. */
plat_dat->tx_coe = 1;
- plat_dat->has_gmac = true;
+ plat_dat->core_type = DWMAC_CORE_GMAC;
plat_dat->bsp_priv = gmac;
plat_dat->init = sun7i_gmac_init;
plat_dat->exit = sun7i_gmac_exit;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
index dc903b846b1b..d765acbe3754 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
@@ -308,7 +308,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
goto disable_clks;
}

- plat->has_xgmac = 1;
+ plat->core_type = DWMAC_CORE_XGMAC;
plat->flags |= STMMAC_FLAG_TSO_EN;
plat->pmt = 1;
plat->bsp_priv = mgbe;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
index 3f7c765dcb79..00083ce52549 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -106,9 +106,7 @@ int stmmac_reset(struct stmmac_priv *priv, void __iomem *ioaddr)
}

static const struct stmmac_hwif_entry {
- bool gmac;
- bool gmac4;
- bool xgmac;
+ enum dwmac_core_type core_type;
u32 min_id;
u32 dev_id;
const struct stmmac_regs_off regs;
@@ -127,9 +125,7 @@ static const struct stmmac_hwif_entry {
} stmmac_hw[] = {
/* NOTE: New HW versions shall go to the end of this table */
{
- .gmac = false,
- .gmac4 = false,
- .xgmac = false,
+ .core_type = DWMAC_CORE_MAC100,
.min_id = 0,
.regs = {
.ptp_off = PTP_GMAC3_X_OFFSET,
@@ -146,9 +142,7 @@ static const struct stmmac_hwif_entry {
.setup = dwmac100_setup,
.quirks = stmmac_dwmac1_quirks,
}, {
- .gmac = true,
- .gmac4 = false,
- .xgmac = false,
+ .core_type = DWMAC_CORE_GMAC,
.min_id = 0,
.regs = {
.ptp_off = PTP_GMAC3_X_OFFSET,
@@ -165,9 +159,7 @@ static const struct stmmac_hwif_entry {
.setup = dwmac1000_setup,
.quirks = stmmac_dwmac1_quirks,
}, {
- .gmac = false,
- .gmac4 = true,
- .xgmac = false,
+ .core_type = DWMAC_CORE_GMAC4,
.min_id = 0,
.regs = {
.ptp_off = PTP_GMAC4_OFFSET,
@@ -187,9 +179,7 @@ static const struct stmmac_hwif_entry {
.setup = dwmac4_setup,
.quirks = stmmac_dwmac4_quirks,
}, {
- .gmac = false,
- .gmac4 = true,
- .xgmac = false,
+ .core_type = DWMAC_CORE_GMAC4,
.min_id = DWMAC_CORE_4_00,
.regs = {
.ptp_off = PTP_GMAC4_OFFSET,
@@ -210,9 +200,7 @@ static const struct stmmac_hwif_entry {
.setup = dwmac4_setup,
.quirks = NULL,
}, {
- .gmac = false,
- .gmac4 = true,
- .xgmac = false,
+ .core_type = DWMAC_CORE_GMAC4,
.min_id = DWMAC_CORE_4_10,
.regs = {
.ptp_off = PTP_GMAC4_OFFSET,
@@ -233,9 +221,7 @@ static const struct stmmac_hwif_entry {
.setup = dwmac4_setup,
.quirks = NULL,
}, {
- .gmac = false,
- .gmac4 = true,
- .xgmac = false,
+ .core_type = DWMAC_CORE_GMAC4,
.min_id = DWMAC_CORE_5_10,
.regs = {
.ptp_off = PTP_GMAC4_OFFSET,
@@ -256,9 +242,7 @@ static const struct stmmac_hwif_entry {
.setup = dwmac4_setup,
.quirks = NULL,
}, {
- .gmac = false,
- .gmac4 = false,
- .xgmac = true,
+ .core_type = DWMAC_CORE_XGMAC,
.min_id = DWXGMAC_CORE_2_10,
.dev_id = DWXGMAC_ID,
.regs = {
@@ -280,9 +264,7 @@ static const struct stmmac_hwif_entry {
.setup = dwxgmac2_setup,
.quirks = NULL,
}, {
- .gmac = false,
- .gmac4 = false,
- .xgmac = true,
+ .core_type = DWMAC_CORE_XGMAC,
.min_id = DWXLGMAC_CORE_2_00,
.dev_id = DWXLGMAC_ID,
.regs = {
@@ -308,20 +290,18 @@ static const struct stmmac_hwif_entry {

int stmmac_hwif_init(struct stmmac_priv *priv)
{
- bool needs_xgmac = priv->plat->has_xgmac;
- bool needs_gmac4 = priv->plat->has_gmac4;
- bool needs_gmac = priv->plat->has_gmac;
+ enum dwmac_core_type core_type = priv->plat->core_type;
const struct stmmac_hwif_entry *entry;
struct mac_device_info *mac;
bool needs_setup = true;
u32 id, dev_id = 0;
int i, ret;

- if (needs_gmac) {
+ if (core_type == DWMAC_CORE_GMAC) {
id = stmmac_get_id(priv, GMAC_VERSION);
- } else if (needs_gmac4 || needs_xgmac) {
+ } else if (dwmac_is_xmac(core_type)) {
id = stmmac_get_id(priv, GMAC4_VERSION);
- if (needs_xgmac)
+ if (core_type == DWMAC_CORE_XGMAC)
dev_id = stmmac_get_dev_id(priv, GMAC4_VERSION);
} else {
id = 0;
@@ -331,14 +311,16 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
priv->synopsys_id = id;

/* Lets assume some safe values first */
- priv->ptpaddr = priv->ioaddr +
- (needs_gmac4 ? PTP_GMAC4_OFFSET : PTP_GMAC3_X_OFFSET);
- priv->mmcaddr = priv->ioaddr +
- (needs_gmac4 ? MMC_GMAC4_OFFSET : MMC_GMAC3_X_OFFSET);
- if (needs_gmac4)
+ if (core_type == DWMAC_CORE_GMAC4) {
+ priv->ptpaddr = priv->ioaddr + PTP_GMAC4_OFFSET;
+ priv->mmcaddr = priv->ioaddr + MMC_GMAC4_OFFSET;
priv->estaddr = priv->ioaddr + EST_GMAC4_OFFSET;
- else if (needs_xgmac)
- priv->estaddr = priv->ioaddr + EST_XGMAC_OFFSET;
+ } else {
+ priv->ptpaddr = priv->ioaddr + PTP_GMAC3_X_OFFSET;
+ priv->mmcaddr = priv->ioaddr + MMC_GMAC3_X_OFFSET;
+ if (core_type == DWMAC_CORE_XGMAC)
+ priv->estaddr = priv->ioaddr + EST_XGMAC_OFFSET;
+ }

/* Check for HW specific setup first */
if (priv->plat->setup) {
@@ -355,16 +337,12 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
for (i = ARRAY_SIZE(stmmac_hw) - 1; i >= 0; i--) {
entry = &stmmac_hw[i];

- if (needs_gmac ^ entry->gmac)
- continue;
- if (needs_gmac4 ^ entry->gmac4)
- continue;
- if (needs_xgmac ^ entry->xgmac)
+ if (core_type != entry->core_type)
continue;
/* Use synopsys_id var because some setups can override this */
if (priv->synopsys_id < entry->min_id)
continue;
- if (needs_xgmac && (dev_id ^ entry->dev_id))
+ if (core_type == DWMAC_CORE_XGMAC && (dev_id ^ entry->dev_id))
continue;

/* Only use generic HW helpers if needed */
@@ -400,6 +378,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
}

dev_err(priv->device, "Failed to find HW IF (id=0x%x, gmac=%d/%d)\n",
- id, needs_gmac, needs_gmac4);
+ id, core_type == DWMAC_CORE_GMAC,
+ core_type == DWMAC_CORE_GMAC4);
return -EINVAL;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c
index 4b513d27a988..afc516059b89 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c
@@ -53,7 +53,7 @@ static int est_configure(struct stmmac_priv *priv, struct stmmac_est *cfg,
}

ctrl = readl(est_addr + EST_CONTROL);
- if (priv->plat->has_xgmac) {
+ if (priv->plat->core_type == DWMAC_CORE_XGMAC) {
ctrl &= ~EST_XGMAC_PTOV;
ctrl |= ((NSEC_PER_SEC / ptp_rate) * EST_XGMAC_PTOV_MUL) <<
EST_XGMAC_PTOV_SHIFT;
@@ -148,7 +148,7 @@ static void est_irq_status(struct stmmac_priv *priv, struct net_device *dev,
}

if (status & EST_BTRE) {
- if (priv->plat->has_xgmac) {
+ if (priv->plat->core_type == DWMAC_CORE_XGMAC) {
btrl = FIELD_GET(EST_XGMAC_BTRL, status);
btrl_max = FIELD_MAX(EST_XGMAC_BTRL);
} else {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 39fa1ec92f82..81d4039e1c08 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -303,9 +303,10 @@ static void stmmac_ethtool_getdrvinfo(struct net_device *dev,
{
struct stmmac_priv *priv = netdev_priv(dev);

- if (priv->plat->has_gmac || priv->plat->has_gmac4)
+ if (priv->plat->core_type == DWMAC_CORE_GMAC ||
+ priv->plat->core_type == DWMAC_CORE_GMAC4)
strscpy(info->driver, GMAC_ETHTOOL_NAME, sizeof(info->driver));
- else if (priv->plat->has_xgmac)
+ else if (priv->plat->core_type == DWMAC_CORE_XGMAC)
strscpy(info->driver, XGMAC_ETHTOOL_NAME, sizeof(info->driver));
else
strscpy(info->driver, MAC100_ETHTOOL_NAME,
@@ -322,47 +323,6 @@ static int stmmac_ethtool_get_link_ksettings(struct net_device *dev,
{
struct stmmac_priv *priv = netdev_priv(dev);

- if (!(priv->plat->flags & STMMAC_FLAG_HAS_INTEGRATED_PCS) &&
- (priv->hw->pcs & STMMAC_PCS_RGMII ||
- priv->hw->pcs & STMMAC_PCS_SGMII)) {
- u32 supported, advertising, lp_advertising;
-
- if (!priv->xstats.pcs_link) {
- cmd->base.speed = SPEED_UNKNOWN;
- cmd->base.duplex = DUPLEX_UNKNOWN;
- return 0;
- }
- cmd->base.duplex = priv->xstats.pcs_duplex;
-
- cmd->base.speed = priv->xstats.pcs_speed;
-
- /* Encoding of PSE bits is defined in 802.3z, 37.2.1.4 */
-
- ethtool_convert_link_mode_to_legacy_u32(
- &supported, cmd->link_modes.supported);
- ethtool_convert_link_mode_to_legacy_u32(
- &advertising, cmd->link_modes.advertising);
- ethtool_convert_link_mode_to_legacy_u32(
- &lp_advertising, cmd->link_modes.lp_advertising);
-
- /* Reg49[3] always set because ANE is always supported */
- cmd->base.autoneg = ADVERTISED_Autoneg;
- supported |= SUPPORTED_Autoneg;
- advertising |= ADVERTISED_Autoneg;
- lp_advertising |= ADVERTISED_Autoneg;
-
- cmd->base.port = PORT_OTHER;
-
- ethtool_convert_legacy_u32_to_link_mode(
- cmd->link_modes.supported, supported);
- ethtool_convert_legacy_u32_to_link_mode(
- cmd->link_modes.advertising, advertising);
- ethtool_convert_legacy_u32_to_link_mode(
- cmd->link_modes.lp_advertising, lp_advertising);
-
- return 0;
- }
-
return phylink_ethtool_ksettings_get(priv->phylink, cmd);
}

@@ -372,20 +332,6 @@ stmmac_ethtool_set_link_ksettings(struct net_device *dev,
{
struct stmmac_priv *priv = netdev_priv(dev);

- if (!(priv->plat->flags & STMMAC_FLAG_HAS_INTEGRATED_PCS) &&
- (priv->hw->pcs & STMMAC_PCS_RGMII ||
- priv->hw->pcs & STMMAC_PCS_SGMII)) {
- /* Only support ANE */
- if (cmd->base.autoneg != AUTONEG_ENABLE)
- return -EINVAL;
-
- mutex_lock(&priv->lock);
- stmmac_pcs_ctrl_ane(priv, 1, priv->hw->ps, 0);
- mutex_unlock(&priv->lock);
-
- return 0;
- }
-
return phylink_ethtool_ksettings_set(priv->phylink, cmd);
}

@@ -406,9 +352,9 @@ static int stmmac_ethtool_get_regs_len(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);

- if (priv->plat->has_xgmac)
+ if (priv->plat->core_type == DWMAC_CORE_XGMAC)
return XGMAC_REGSIZE * 4;
- else if (priv->plat->has_gmac4)
+ else if (priv->plat->core_type == DWMAC_CORE_GMAC4)
return GMAC4_REG_SPACE_SIZE;
return REG_SPACE_SIZE;
}
@@ -423,12 +369,12 @@ static void stmmac_ethtool_gregs(struct net_device *dev,
stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space);

/* Copy DMA registers to where ethtool expects them */
- if (priv->plat->has_gmac4) {
+ if (priv->plat->core_type == DWMAC_CORE_GMAC4) {
/* GMAC4 dumps its DMA registers at its DMA_CHAN_BASE_ADDR */
memcpy(&reg_space[ETHTOOL_DMA_OFFSET],
&reg_space[GMAC4_DMA_CHAN_BASE_ADDR / 4],
NUM_DWMAC4_DMA_REGS * 4);
- } else if (!priv->plat->has_xgmac) {
+ } else if (priv->plat->core_type != DWMAC_CORE_XGMAC) {
memcpy(&reg_space[ETHTOOL_DMA_OFFSET],
&reg_space[DMA_BUS_MODE / 4],
NUM_DWMAC1000_DMA_REGS * 4);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0dd17179c85d..46299b7925b4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -446,7 +446,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
if (!priv->hwts_rx_en)
return;
/* For GMAC4, the valid timestamp is from CTX next desc. */
- if (priv->plat->has_gmac4 || priv->plat->has_xgmac)
+ if (dwmac_is_xmac(priv->plat->core_type))
desc = np;

/* Check if timestamp is available */
@@ -697,7 +697,7 @@ static int stmmac_hwtstamp_get(struct net_device *dev,
static int stmmac_init_tstamp_counter(struct stmmac_priv *priv,
u32 systime_flags)
{
- bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+ bool xmac = dwmac_is_xmac(priv->plat->core_type);
struct timespec64 now;
u32 sec_inc = 0;
u64 temp = 0;
@@ -746,7 +746,7 @@ static int stmmac_init_tstamp_counter(struct stmmac_priv *priv,
*/
static int stmmac_init_timestamping(struct stmmac_priv *priv)
{
- bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+ bool xmac = dwmac_is_xmac(priv->plat->core_type);
int ret;

if (priv->plat->ptp_clk_freq_config)
@@ -2398,7 +2398,7 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
txfifosz = priv->dma_cap.tx_fifo_size;

/* Split up the shared Tx/Rx FIFO memory on DW QoS Eth and DW XGMAC */
- if (priv->plat->has_gmac4 || priv->plat->has_xgmac) {
+ if (dwmac_is_xmac(priv->plat->core_type)) {
rxfifosz /= rx_channels_count;
txfifosz /= tx_channels_count;
}
@@ -4514,7 +4514,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
if (skb_is_gso(skb) && priv->tso) {
if (gso & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))
return stmmac_tso_xmit(skb, dev);
- if (priv->plat->has_gmac4 && (gso & SKB_GSO_UDP_L4))
+ if (priv->plat->core_type == DWMAC_CORE_GMAC4 &&
+ (gso & SKB_GSO_UDP_L4))
return stmmac_tso_xmit(skb, dev);
}

@@ -4879,13 +4880,27 @@ static unsigned int stmmac_rx_buf2_len(struct stmmac_priv *priv,
if (!priv->sph)
return 0;

- /* Not last descriptor */
- if (status & rx_not_ls)
+ /* For GMAC4, when split header is enabled, in some rare cases, the
+ * hardware does not fill buf2 of the first descriptor with payload.
+ * Thus we cannot assume buf2 is always fully filled if it is not
+ * the last descriptor. Otherwise, the length of buf2 of the second
+ * descriptor will be calculated wrong and cause an oops.
+ *
+ * If this is the last descriptor, 'plen' is the length of the
+ * received packet that was transferred to system memory.
+ * Otherwise, it is the accumulated number of bytes that have been
+ * transferred for the current packet.
+ *
+ * Thus 'plen - len' always gives the correct length of buf2.
+ */
+
+ /* Not GMAC4 and not last descriptor */
+ if (priv->plat->core_type != DWMAC_CORE_GMAC4 && (status & rx_not_ls))
return priv->dma_conf.dma_buf_sz;

+ /* GMAC4 or last descriptor */
plen = stmmac_get_rx_frame_len(priv, p, coe);

- /* Last descriptor */
return plen - len;
}

@@ -5984,7 +5999,7 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv)
u32 queue;
bool xmac;

- xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+ xmac = dwmac_is_xmac(priv->plat->core_type);
queues_count = (rx_cnt > tx_cnt) ? rx_cnt : tx_cnt;

if (priv->irq_wake)
@@ -5998,7 +6013,7 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv)
stmmac_fpe_irq_status(priv);

/* To handle GMAC own interrupts */
- if ((priv->plat->has_gmac) || xmac) {
+ if (priv->plat->core_type == DWMAC_CORE_GMAC || xmac) {
int status = stmmac_host_irq_status(priv, priv->hw, &priv->xstats);

if (unlikely(status)) {
@@ -6012,15 +6027,6 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv)
for (queue = 0; queue < queues_count; queue++)
stmmac_host_mtl_irq_status(priv, priv->hw, queue);

- /* PCS link status */
- if (priv->hw->pcs &&
- !(priv->plat->flags & STMMAC_FLAG_HAS_INTEGRATED_PCS)) {
- if (priv->xstats.pcs_link)
- netif_carrier_on(priv->dev);
- else
- netif_carrier_off(priv->dev);
- }
-
stmmac_timestamp_interrupt(priv, priv);
}
}
@@ -6368,7 +6374,7 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v)
(priv->dma_cap.mbps_1000) ? "Y" : "N");
seq_printf(seq, "\tHalf duplex: %s\n",
(priv->dma_cap.half_duplex) ? "Y" : "N");
- if (priv->plat->has_xgmac) {
+ if (priv->plat->core_type == DWMAC_CORE_XGMAC) {
seq_printf(seq,
"\tNumber of Additional MAC address registers: %d\n",
priv->dma_cap.multi_addr);
@@ -6392,7 +6398,7 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v)
(priv->dma_cap.time_stamp) ? "Y" : "N");
seq_printf(seq, "\tIEEE 1588-2008 Advanced Time Stamp: %s\n",
(priv->dma_cap.atime_stamp) ? "Y" : "N");
- if (priv->plat->has_xgmac)
+ if (priv->plat->core_type == DWMAC_CORE_XGMAC)
seq_printf(seq, "\tTimestamp System Time Source: %s\n",
dwxgmac_timestamp_source[priv->dma_cap.tssrc]);
seq_printf(seq, "\t802.3az - Energy-Efficient Ethernet (EEE): %s\n",
@@ -6401,7 +6407,7 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v)
seq_printf(seq, "\tChecksum Offload in TX: %s\n",
(priv->dma_cap.tx_coe) ? "Y" : "N");
if (priv->synopsys_id >= DWMAC_CORE_4_00 ||
- priv->plat->has_xgmac) {
+ priv->plat->core_type == DWMAC_CORE_XGMAC) {
seq_printf(seq, "\tIP Checksum Offload in RX: %s\n",
(priv->dma_cap.rx_coe) ? "Y" : "N");
} else {
@@ -7253,8 +7259,9 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
* has to be disable and this can be done by passing the
* riwt_off field from the platform.
*/
- if (((priv->synopsys_id >= DWMAC_CORE_3_50) ||
- (priv->plat->has_xgmac)) && (!priv->plat->riwt_off)) {
+ if ((priv->synopsys_id >= DWMAC_CORE_3_50 ||
+ priv->plat->core_type == DWMAC_CORE_XGMAC) &&
+ !priv->plat->riwt_off) {
priv->use_riwt = 1;
dev_info(priv->device,
"Enable RX Mitigation via HW Watchdog Timer\n");
@@ -7368,7 +7375,7 @@ static int stmmac_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp)
return -ENODATA;

/* For GMAC4, the valid timestamp is from CTX next desc. */
- if (priv->plat->has_gmac4 || priv->plat->has_xgmac)
+ if (dwmac_is_xmac(priv->plat->core_type))
desc_contains_ts = ndesc;

/* Check if timestamp is available */
@@ -7524,7 +7531,7 @@ int stmmac_dvr_probe(struct device *device,

if ((priv->plat->flags & STMMAC_FLAG_TSO_EN) && (priv->dma_cap.tsoen)) {
ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
- if (priv->plat->has_gmac4)
+ if (priv->plat->core_type == DWMAC_CORE_GMAC4)
ndev->hw_features |= NETIF_F_GSO_UDP_L4;
priv->tso = true;
dev_info(priv->device, "TSO feature enabled\n");
@@ -7577,7 +7584,7 @@ int stmmac_dvr_probe(struct device *device,
#ifdef STMMAC_VLAN_TAG_USED
/* Both mac100 and gmac support receive VLAN tag detection */
ndev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX;
- if (priv->plat->has_gmac4 || priv->plat->has_xgmac) {
+ if (dwmac_is_xmac(priv->plat->core_type)) {
ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
priv->hw->hw_vlan_en = true;
}
@@ -7605,7 +7612,7 @@ int stmmac_dvr_probe(struct device *device,

/* MTU range: 46 - hw-specific max */
ndev->min_mtu = ETH_ZLEN - ETH_HLEN;
- if (priv->plat->has_xgmac)
+ if (priv->plat->core_type == DWMAC_CORE_XGMAC)
ndev->max_mtu = XGMAC_JUMBO_LEN;
else if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00))
ndev->max_mtu = JUMBO_LEN;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index f408737f6fc7..2b55b02de380 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -301,7 +301,7 @@ static int stmmac_mdio_read_c22(struct mii_bus *bus, int phyaddr, int phyreg)
struct stmmac_priv *priv = netdev_priv(bus->priv);
u32 cmd;

- if (priv->plat->has_gmac4)
+ if (priv->plat->core_type == DWMAC_CORE_GMAC4)
cmd = MII_GMAC4_READ;
else
cmd = 0;
@@ -344,7 +344,7 @@ static int stmmac_mdio_write_c22(struct mii_bus *bus, int phyaddr, int phyreg,
struct stmmac_priv *priv = netdev_priv(bus->priv);
u32 cmd;

- if (priv->plat->has_gmac4)
+ if (priv->plat->core_type == DWMAC_CORE_GMAC4)
cmd = MII_GMAC4_WRITE;
else
cmd = MII_ADDR_GWRITE;
@@ -417,7 +417,7 @@ int stmmac_mdio_reset(struct mii_bus *bus)
* on MDC, so perform a dummy mdio read. To be updated for GMAC4
* if needed.
*/
- if (!priv->plat->has_gmac4)
+ if (priv->plat->core_type != DWMAC_CORE_GMAC4)
writel(0, priv->ioaddr + mii_address);
#endif
return 0;
@@ -528,7 +528,7 @@ static u32 stmmac_clk_csr_set(struct stmmac_priv *priv)
value = 0;
}

- if (priv->plat->has_xgmac) {
+ if (priv->plat->core_type == DWMAC_CORE_XGMAC) {
if (clk_rate > 400000000)
value = 0x5;
else if (clk_rate > 350000000)
@@ -600,7 +600,7 @@ int stmmac_mdio_register(struct net_device *ndev)

new_bus->name = "stmmac";

- if (priv->plat->has_xgmac) {
+ if (priv->plat->core_type == DWMAC_CORE_XGMAC) {
new_bus->read = &stmmac_xgmac2_mdio_read_c22;
new_bus->write = &stmmac_xgmac2_mdio_write_c22;
new_bus->read_c45 = &stmmac_xgmac2_mdio_read_c45;
@@ -621,7 +621,7 @@ int stmmac_mdio_register(struct net_device *ndev)
} else {
new_bus->read = &stmmac_mdio_read_c22;
new_bus->write = &stmmac_mdio_write_c22;
- if (priv->plat->has_gmac4) {
+ if (priv->plat->core_type == DWMAC_CORE_GMAC4) {
new_bus->read_c45 = &stmmac_mdio_read_c45;
new_bus->write_c45 = &stmmac_mdio_write_c45;
}
@@ -649,7 +649,7 @@ int stmmac_mdio_register(struct net_device *ndev)
}

/* Looks like we need a dummy read for XGMAC only and C45 PHYs */
- if (priv->plat->has_xgmac)
+ if (priv->plat->core_type == DWMAC_CORE_XGMAC)
stmmac_xgmac2_mdio_read_c45(new_bus, 0, 0, 0);

/* If fixed-link is set, skip PHY scanning */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 4e3aa611fda8..94b3a3b27270 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -23,7 +23,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat)
{
/* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
plat->clk_csr = STMMAC_CSR_20_35M;
- plat->has_gmac = 1;
+ plat->core_type = DWMAC_CORE_GMAC;
plat->force_sf_dma_mode = 1;

plat->mdio_bus_data->needs_reset = true;
@@ -76,7 +76,7 @@ static int snps_gmac5_default_data(struct pci_dev *pdev,
int i;

plat->clk_csr = STMMAC_CSR_250_300M;
- plat->has_gmac4 = 1;
+ plat->core_type = DWMAC_CORE_GMAC4;
plat->force_sf_dma_mode = 1;
plat->flags |= STMMAC_FLAG_TSO_EN;
plat->pmt = 1;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 27bcaae07a7f..fbb92cc6ab59 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -552,12 +552,12 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
&pdev->dev, plat->unicast_filter_entries);
plat->multicast_filter_bins = dwmac1000_validate_mcast_bins(
&pdev->dev, plat->multicast_filter_bins);
- plat->has_gmac = 1;
+ plat->core_type = DWMAC_CORE_GMAC;
plat->pmt = 1;
}

if (of_device_is_compatible(np, "snps,dwmac-3.40a")) {
- plat->has_gmac = 1;
+ plat->core_type = DWMAC_CORE_GMAC;
plat->enh_desc = 1;
plat->tx_coe = 1;
plat->bugged_jumbo = 1;
@@ -565,8 +565,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
}

if (of_device_compatible_match(np, stmmac_gmac4_compats)) {
- plat->has_gmac4 = 1;
- plat->has_gmac = 0;
+ plat->core_type = DWMAC_CORE_GMAC4;
plat->pmt = 1;
if (of_property_read_bool(np, "snps,tso"))
plat->flags |= STMMAC_FLAG_TSO_EN;
@@ -580,7 +579,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
}

if (of_device_is_compatible(np, "snps,dwxgmac")) {
- plat->has_xgmac = 1;
+ plat->core_type = DWMAC_CORE_XGMAC;
plat->pmt = 1;
if (of_property_read_bool(np, "snps,tso"))
plat->flags |= STMMAC_FLAG_TSO_EN;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 993ff4e87e55..3e30172fa129 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -57,7 +57,7 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta)
bool xmac, est_rst = false;
int ret;

- xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+ xmac = dwmac_is_xmac(priv->plat->core_type);

if (delta < 0) {
neg_adj = 1;
@@ -344,7 +344,7 @@ void stmmac_ptp_register(struct stmmac_priv *priv)

/* Calculate the clock domain crossing (CDC) error if necessary */
priv->plat->cdc_error_adj = 0;
- if (priv->plat->has_gmac4)
+ if (priv->plat->core_type == DWMAC_CORE_GMAC4)
priv->plat->cdc_error_adj = (2 * NSEC_PER_SEC) / priv->plat->clk_ptp_rate;

/* Update the ptp clock parameters based on feature discovery, when
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index 48f0a96c0e9e..666998082998 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -2551,6 +2551,9 @@ static int happy_meal_sbus_probe_one(struct platform_device *op, int is_qfe)
goto err_out_clear_quattro;
}

+ /* BIGMAC may have bogus sizes */
+ if ((op->resource[3].end - op->resource[3].start) >= BMAC_REG_SIZE)
+ op->resource[3].end = op->resource[3].start + BMAC_REG_SIZE - 1;
hp->bigmacregs = devm_platform_ioremap_resource(op, 3);
if (IS_ERR(hp->bigmacregs)) {
dev_err(&op->dev, "Cannot map BIGMAC registers.\n");
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index c509228be84d..4433b8e95b6a 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1572,6 +1572,11 @@ int macvlan_common_newlink(struct net_device *dev,
if (create)
macvlan_port_destroy(port->dev);
}
+ /* @dev might have been made visible before an error was detected.
+ * Make sure to observe an RCU grace period before our caller
+ * (rtnl_newlink()) frees it.
+ */
+ synchronize_net();
return err;
}
EXPORT_SYMBOL_GPL(macvlan_common_newlink);
diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c
index f782d93f826e..8043b57bdf25 100644
--- a/drivers/net/mctp/mctp-i2c.c
+++ b/drivers/net/mctp/mctp-i2c.c
@@ -242,6 +242,12 @@ static int mctp_i2c_slave_cb(struct i2c_client *client,
return 0;

switch (event) {
+ case I2C_SLAVE_READ_REQUESTED:
+ case I2C_SLAVE_READ_PROCESSED:
+ /* MCTP I2C transport only uses writes */
+ midev->rx_pos = 0;
+ *val = 0xff;
+ break;
case I2C_SLAVE_WRITE_RECEIVED:
if (midev->rx_pos < MCTP_I2C_BUFSZ) {
midev->rx_buffer[midev->rx_pos] = *val;
@@ -279,6 +285,9 @@ static int mctp_i2c_recv(struct mctp_i2c_dev *midev)
size_t recvlen;
int status;

+ if (midev->rx_pos == 0)
+ return 0;
+
/* + 1 for the PEC */
if (midev->rx_pos < MCTP_I2C_MINLEN + 1) {
ndev->stats.rx_length_errors++;
diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c
index 3e9e7f8444b3..955c9a37e1f8 100644
--- a/drivers/net/ovpn/io.c
+++ b/drivers/net/ovpn/io.c
@@ -355,6 +355,7 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
struct ovpn_priv *ovpn = netdev_priv(dev);
struct sk_buff *segments, *curr, *next;
struct sk_buff_head skb_list;
+ unsigned int tx_bytes = 0;
struct ovpn_peer *peer;
__be16 proto;
int ret;
@@ -365,7 +366,27 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
/* verify IP header size in network packet */
proto = ovpn_ip_check_protocol(skb);
if (unlikely(!proto || skb->protocol != proto))
- goto drop;
+ goto drop_no_peer;
+
+ /* retrieve peer serving the destination IP of this packet */
+ peer = ovpn_peer_get_by_dst(ovpn, skb);
+ if (unlikely(!peer)) {
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ net_dbg_ratelimited("%s: no peer to send data to dst=%pI4\n",
+ netdev_name(ovpn->dev),
+ &ip_hdr(skb)->daddr);
+ break;
+ case htons(ETH_P_IPV6):
+ net_dbg_ratelimited("%s: no peer to send data to dst=%pI6c\n",
+ netdev_name(ovpn->dev),
+ &ipv6_hdr(skb)->daddr);
+ break;
+ }
+ goto drop_no_peer;
+ }
+ /* dst was needed for peer selection - it can now be dropped */
+ skb_dst_drop(skb);

if (skb_is_gso(skb)) {
segments = skb_gso_segment(skb, 0);
@@ -394,36 +415,28 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
continue;
}

+ /* only count what we actually send */
+ tx_bytes += curr->len;
__skb_queue_tail(&skb_list, curr);
}
- skb_list.prev->next = NULL;

- /* retrieve peer serving the destination IP of this packet */
- peer = ovpn_peer_get_by_dst(ovpn, skb);
- if (unlikely(!peer)) {
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- net_dbg_ratelimited("%s: no peer to send data to dst=%pI4\n",
- netdev_name(ovpn->dev),
- &ip_hdr(skb)->daddr);
- break;
- case htons(ETH_P_IPV6):
- net_dbg_ratelimited("%s: no peer to send data to dst=%pI6c\n",
- netdev_name(ovpn->dev),
- &ipv6_hdr(skb)->daddr);
- break;
- }
- goto drop;
+ /* no segments survived: don't jump to 'drop' because we already
+ * incremented the counter for each failure in the loop
+ */
+ if (unlikely(skb_queue_empty(&skb_list))) {
+ ovpn_peer_put(peer);
+ return NETDEV_TX_OK;
}
- /* dst was needed for peer selection - it can now be dropped */
- skb_dst_drop(skb);
+ skb_list.prev->next = NULL;

- ovpn_peer_stats_increment_tx(&peer->vpn_stats, skb->len);
+ ovpn_peer_stats_increment_tx(&peer->vpn_stats, tx_bytes);
ovpn_send(ovpn, skb_list.next, peer);

return NETDEV_TX_OK;

drop:
+ ovpn_peer_put(peer);
+drop_no_peer:
dev_dstats_tx_dropped(ovpn->dev);
skb_tx_error(skb);
kfree_skb_list(skb);
diff --git a/drivers/net/ovpn/socket.c b/drivers/net/ovpn/socket.c
index 9750871ab65c..448cee3b3f9f 100644
--- a/drivers/net/ovpn/socket.c
+++ b/drivers/net/ovpn/socket.c
@@ -200,6 +200,22 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
ovpn_sock->sk = sk;
kref_init(&ovpn_sock->refcount);

+ /* TCP sockets are per-peer, therefore they are linked to their unique
+ * peer
+ */
+ if (sk->sk_protocol == IPPROTO_TCP) {
+ INIT_WORK(&ovpn_sock->tcp_tx_work, ovpn_tcp_tx_work);
+ ovpn_sock->peer = peer;
+ ovpn_peer_hold(peer);
+ } else if (sk->sk_protocol == IPPROTO_UDP) {
+ /* in UDP we only link the ovpn instance since the socket is
+ * shared among multiple peers
+ */
+ ovpn_sock->ovpn = peer->ovpn;
+ netdev_hold(peer->ovpn->dev, &ovpn_sock->dev_tracker,
+ GFP_KERNEL);
+ }
+
/* the newly created ovpn_socket is holding reference to sk,
* therefore we increase its refcounter.
*
@@ -212,29 +228,16 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)

ret = ovpn_socket_attach(ovpn_sock, sock, peer);
if (ret < 0) {
+ if (sk->sk_protocol == IPPROTO_TCP)
+ ovpn_peer_put(peer);
+ else if (sk->sk_protocol == IPPROTO_UDP)
+ netdev_put(peer->ovpn->dev, &ovpn_sock->dev_tracker);
+
sock_put(sk);
kfree(ovpn_sock);
ovpn_sock = ERR_PTR(ret);
- goto sock_release;
- }
-
- /* TCP sockets are per-peer, therefore they are linked to their unique
- * peer
- */
- if (sk->sk_protocol == IPPROTO_TCP) {
- INIT_WORK(&ovpn_sock->tcp_tx_work, ovpn_tcp_tx_work);
- ovpn_sock->peer = peer;
- ovpn_peer_hold(peer);
- } else if (sk->sk_protocol == IPPROTO_UDP) {
- /* in UDP we only link the ovpn instance since the socket is
- * shared among multiple peers
- */
- ovpn_sock->ovpn = peer->ovpn;
- netdev_hold(peer->ovpn->dev, &ovpn_sock->dev_tracker,
- GFP_KERNEL);
}

- rcu_assign_sk_user_data(sk, ovpn_sock);
sock_release:
release_sock(sk);
return ovpn_sock;
diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c
index 0d7f30360d87..ec2bbc28c196 100644
--- a/drivers/net/ovpn/tcp.c
+++ b/drivers/net/ovpn/tcp.c
@@ -199,7 +199,19 @@ void ovpn_tcp_socket_detach(struct ovpn_socket *ovpn_sock)
sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready;
sk->sk_write_space = peer->tcp.sk_cb.sk_write_space;
sk->sk_prot = peer->tcp.sk_cb.prot;
- sk->sk_socket->ops = peer->tcp.sk_cb.ops;
+
+ /* tcp_close() may race this function and could set
+ * sk->sk_socket to NULL. It does so by invoking
+ * sock_orphan(), which holds sk_callback_lock before
+ * doing the assignment.
+ *
+ * For this reason we acquire the same lock to avoid
+ * sk_socket to disappear under our feet
+ */
+ write_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket)
+ sk->sk_socket->ops = peer->tcp.sk_cb.ops;
+ write_unlock_bh(&sk->sk_callback_lock);

rcu_assign_sk_user_data(sk, NULL);
}
@@ -487,6 +499,7 @@ int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
/* make sure no pre-existing encapsulation handler exists */
if (ovpn_sock->sk->sk_user_data)
return -EBUSY;
+ rcu_assign_sk_user_data(ovpn_sock->sk, ovpn_sock);

/* only a fully connected socket is expected. Connection should be
* handled in userspace
@@ -495,13 +508,14 @@ int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
net_err_ratelimited("%s: provided TCP socket is not in ESTABLISHED state: %d\n",
netdev_name(peer->ovpn->dev),
ovpn_sock->sk->sk_state);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err;
}

ret = strp_init(&peer->tcp.strp, ovpn_sock->sk, &cb);
if (ret < 0) {
DEBUG_NET_WARN_ON_ONCE(1);
- return ret;
+ goto err;
}

INIT_WORK(&peer->tcp.defer_del_work, ovpn_tcp_peer_del_work);
@@ -536,6 +550,9 @@ int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
strp_check_rcv(&peer->tcp.strp);

return 0;
+err:
+ rcu_assign_sk_user_data(ovpn_sock->sk, NULL);
+ return ret;
}

static void ovpn_tcp_close(struct sock *sk, long timeout)
diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c
index d6a0f7a0b75d..272b535ecaad 100644
--- a/drivers/net/ovpn/udp.c
+++ b/drivers/net/ovpn/udp.c
@@ -386,6 +386,7 @@ int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock, struct socket *sock,
struct ovpn_priv *ovpn)
{
struct udp_tunnel_sock_cfg cfg = {
+ .sk_user_data = ovpn_sock,
.encap_type = UDP_ENCAP_OVPNINUDP,
.encap_rcv = ovpn_udp_encap_recv,
.encap_destroy = ovpn_udp_encap_destroy,
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 6759388692f8..3c824340ffb0 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -64,6 +64,16 @@ static const char driver_name[] = "catc";
#define CTRL_QUEUE 16 /* Max control requests in flight (power of two) */
#define RX_PKT_SZ 1600 /* Max size of receive packet for F5U011 */

+/*
+ * USB endpoints.
+ */
+
+enum catc_usb_ep {
+ CATC_USB_EP_CONTROL = 0,
+ CATC_USB_EP_BULK = 1,
+ CATC_USB_EP_INT_IN = 2,
+};
+
/*
* Control requests.
*/
@@ -772,6 +782,13 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
u8 broadcast[ETH_ALEN];
u8 *macbuf;
int pktsz, ret = -ENOMEM;
+ static const u8 bulk_ep_addr[] = {
+ CATC_USB_EP_BULK | USB_DIR_OUT,
+ CATC_USB_EP_BULK | USB_DIR_IN,
+ 0};
+ static const u8 int_ep_addr[] = {
+ CATC_USB_EP_INT_IN | USB_DIR_IN,
+ 0};

macbuf = kmalloc(ETH_ALEN, GFP_KERNEL);
if (!macbuf)
@@ -784,6 +801,14 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
goto fail_mem;
}

+ /* Verify that all required endpoints are present */
+ if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) ||
+ !usb_check_int_endpoints(intf, int_ep_addr)) {
+ dev_err(dev, "Missing or invalid endpoints\n");
+ ret = -ENODEV;
+ goto fail_mem;
+ }
+
netdev = alloc_etherdev(sizeof(struct catc));
if (!netdev)
goto fail_mem;
@@ -828,14 +853,14 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
usb_fill_control_urb(catc->ctrl_urb, usbdev, usb_sndctrlpipe(usbdev, 0),
NULL, NULL, 0, catc_ctrl_done, catc);

- usb_fill_bulk_urb(catc->tx_urb, usbdev, usb_sndbulkpipe(usbdev, 1),
- NULL, 0, catc_tx_done, catc);
+ usb_fill_bulk_urb(catc->tx_urb, usbdev, usb_sndbulkpipe(usbdev, CATC_USB_EP_BULK),
+ NULL, 0, catc_tx_done, catc);

- usb_fill_bulk_urb(catc->rx_urb, usbdev, usb_rcvbulkpipe(usbdev, 1),
- catc->rx_buf, pktsz, catc_rx_done, catc);
+ usb_fill_bulk_urb(catc->rx_urb, usbdev, usb_rcvbulkpipe(usbdev, CATC_USB_EP_BULK),
+ catc->rx_buf, pktsz, catc_rx_done, catc);

- usb_fill_int_urb(catc->irq_urb, usbdev, usb_rcvintpipe(usbdev, 2),
- catc->irq_buf, 2, catc_irq_done, catc, 1);
+ usb_fill_int_urb(catc->irq_urb, usbdev, usb_rcvintpipe(usbdev, CATC_USB_EP_INT_IN),
+ catc->irq_buf, 2, catc_irq_done, catc, 1);

if (!catc->is_f5u011) {
u32 *buf;
diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
index c06d50db40b8..00d0556dafef 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.c
+++ b/drivers/net/wireless/ath/ath10k/sdio.c
@@ -2487,7 +2487,11 @@ void ath10k_sdio_fw_crashed_dump(struct ath10k *ar)
if (fast_dump)
ath10k_bmi_start(ar);

+ mutex_lock(&ar->dump_mutex);
+
+ spin_lock_bh(&ar->data_lock);
ar->stats.fw_crash_counter++;
+ spin_unlock_bh(&ar->data_lock);

ath10k_sdio_disable_intrs(ar);

@@ -2505,6 +2509,8 @@ void ath10k_sdio_fw_crashed_dump(struct ath10k *ar)

ath10k_sdio_enable_intrs(ar);

+ mutex_unlock(&ar->dump_mutex);
+
ath10k_core_start_recovery(ar);
}

diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index 812686173ac8..06b4df2370e9 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -997,6 +997,33 @@ static const struct dmi_system_id ath11k_pm_quirk_table[] = {
{}
};

+static const struct __ath11k_core_usecase_firmware_table {
+ u32 hw_rev;
+ const char *compatible;
+ const char *firmware_name;
+} ath11k_core_usecase_firmware_table[] = {
+ { ATH11K_HW_WCN6855_HW21, "qcom,lemans-evk", "nfa765"},
+ { ATH11K_HW_WCN6855_HW21, "qcom,monaco-evk", "nfa765"},
+ { ATH11K_HW_WCN6855_HW21, "qcom,hamoa-iot-evk", "nfa765"},
+ { /* Sentinel */ }
+};
+
+const char *ath11k_core_get_usecase_firmware(struct ath11k_base *ab)
+{
+ const struct __ath11k_core_usecase_firmware_table *entry = NULL;
+
+ entry = ath11k_core_usecase_firmware_table;
+ while (entry->compatible) {
+ if (ab->hw_rev == entry->hw_rev &&
+ of_machine_is_compatible(entry->compatible))
+ return entry->firmware_name;
+ entry++;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL(ath11k_core_get_usecase_firmware);
+
void ath11k_fw_stats_pdevs_free(struct list_head *head)
{
struct ath11k_fw_stats_pdev *i, *tmp;
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index e8780b05ce11..834988dad591 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -1275,6 +1275,7 @@ bool ath11k_core_coldboot_cal_support(struct ath11k_base *ab);

const struct firmware *ath11k_core_firmware_request(struct ath11k_base *ab,
const char *filename);
+const char *ath11k_core_get_usecase_firmware(struct ath11k_base *ab);

static inline const char *ath11k_scan_state_str(enum ath11k_scan_state state)
{
@@ -1329,6 +1330,9 @@ static inline void ath11k_core_create_firmware_path(struct ath11k_base *ab,

of_property_read_string(ab->dev->of_node, "firmware-name", &fw_name);

+ if (!fw_name)
+ fw_name = ath11k_core_get_usecase_firmware(ab);
+
if (fw_name && strncmp(filename, "board", 5))
snprintf(buf, buf_len, "%s/%s/%s/%s", ATH11K_FW_DIR,
ab->hw_params.fw.dir, fw_name, filename);
diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c
index 48b010a1b756..4f749d473d0e 100644
--- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c
+++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
/*
* Copyright (c) 2018-2021 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
*/

#include <linux/vmalloc.h>
@@ -29,8 +29,10 @@ print_array_to_buf_index(u8 *buf, u32 offset, const char *header, u32 stats_inde
" %u:%u,", stats_index++, le32_to_cpu(array[i]));
}
/* To overwrite the last trailing comma */
- index--;
- *(buf + offset + index) = '\0';
+ if (array_len > 0) {
+ index--;
+ *(buf + offset + index) = '\0';
+ }

if (footer) {
index += scnprintf(buf + offset + index,
diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c
index 256ffae4d7f7..b97469dca046 100644
--- a/drivers/net/wireless/ath/ath12k/mac.c
+++ b/drivers/net/wireless/ath/ath12k/mac.c
@@ -4004,8 +4004,10 @@ ath12k_mac_op_change_vif_links(struct ieee80211_hw *hw,
if (WARN_ON(!arvif))
return -EINVAL;

- if (!arvif->is_created)
+ if (!arvif->is_created) {
+ ath12k_mac_unassign_link_vif(arvif);
continue;
+ }

if (WARN_ON(!arvif->ar))
return -EINVAL;
diff --git a/drivers/net/wireless/ath/ath12k/wow.c b/drivers/net/wireless/ath/ath12k/wow.c
index e8481626f194..c78aa95d4979 100644
--- a/drivers/net/wireless/ath/ath12k/wow.c
+++ b/drivers/net/wireless/ath/ath12k/wow.c
@@ -135,6 +135,9 @@ static int ath12k_wow_cleanup(struct ath12k *ar)
lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy);

list_for_each_entry(arvif, &ar->arvifs, list) {
+ if (arvif != &arvif->ahvif->deflink)
+ continue;
+
ret = ath12k_wow_vif_cleanup(arvif);
if (ret) {
ath12k_warn(ar->ab, "failed to clean wow wakeups on vdev %i: %d\n",
@@ -479,8 +482,12 @@ static int ath12k_wow_set_wakeups(struct ath12k *ar,
lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy);

list_for_each_entry(arvif, &ar->arvifs, list) {
+ if (arvif != &arvif->ahvif->deflink)
+ continue;
+
if (ath12k_wow_is_p2p_vdev(arvif->ahvif))
continue;
+
ret = ath12k_wow_vif_set_wakeups(arvif, wowlan);
if (ret) {
ath12k_warn(ar->ab, "failed to set wow wakeups on vdev %i: %d\n",
@@ -538,6 +545,9 @@ static int ath12k_wow_nlo_cleanup(struct ath12k *ar)
lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy);

list_for_each_entry(arvif, &ar->arvifs, list) {
+ if (arvif != &arvif->ahvif->deflink)
+ continue;
+
if (ath12k_wow_is_p2p_vdev(arvif->ahvif))
continue;

@@ -745,6 +755,9 @@ static int ath12k_wow_arp_ns_offload(struct ath12k *ar, bool enable)
list_for_each_entry(arvif, &ar->arvifs, list) {
ahvif = arvif->ahvif;

+ if (arvif != &ahvif->deflink)
+ continue;
+
if (ahvif->vdev_type != WMI_VDEV_TYPE_STA)
continue;

@@ -776,6 +789,9 @@ static int ath12k_gtk_rekey_offload(struct ath12k *ar, bool enable)
lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy);

list_for_each_entry(arvif, &ar->arvifs, list) {
+ if (arvif != &arvif->ahvif->deflink)
+ continue;
+
if (arvif->ahvif->vdev_type != WMI_VDEV_TYPE_STA ||
!arvif->is_up ||
!arvif->rekey_data.enable_offload)
diff --git a/drivers/net/wireless/ath/ath9k/Kconfig b/drivers/net/wireless/ath/ath9k/Kconfig
index 0c47be06c153..47d570a5ca6a 100644
--- a/drivers/net/wireless/ath/ath9k/Kconfig
+++ b/drivers/net/wireless/ath/ath9k/Kconfig
@@ -47,7 +47,7 @@ config ATH9K_PCI

config ATH9K_AHB
bool "Atheros ath9k AHB bus support"
- depends on ATH9K
+ depends on ATH9K && OF
default n
help
This option enables the AHB bus support in ath9k.
diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c
index 3dc7981c510f..a82df3814069 100644
--- a/drivers/net/wireless/realtek/rtw89/debug.c
+++ b/drivers/net/wireless/realtek/rtw89/debug.c
@@ -824,10 +824,6 @@ static ssize_t __print_txpwr_map(struct rtw89_dev *rtwdev, char *buf, size_t buf
s8 *bufp, tmp;
int ret;

- bufp = vzalloc(map->addr_to - map->addr_from + 4);
- if (!bufp)
- return -ENOMEM;
-
if (path_num == 1)
max_valid_addr = map->addr_to_1ss;
else
@@ -836,6 +832,10 @@ static ssize_t __print_txpwr_map(struct rtw89_dev *rtwdev, char *buf, size_t buf
if (max_valid_addr == 0)
return -EOPNOTSUPP;

+ bufp = vzalloc(map->addr_to - map->addr_from + 4);
+ if (!bufp)
+ return -ENOMEM;
+
for (addr = map->addr_from; addr <= max_valid_addr; addr += 4) {
ret = rtw89_mac_txpwr_read32(rtwdev, RTW89_PHY_0, addr, &val);
if (ret)
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index a78a25b87240..61b547aab286 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -735,10 +735,11 @@ static void connect(struct backend_info *be)
*/
requested_num_queues = xenbus_read_unsigned(dev->otherend,
"multi-queue-num-queues", 1);
- if (requested_num_queues > xenvif_max_queues) {
+ if (requested_num_queues > xenvif_max_queues ||
+ requested_num_queues == 0) {
/* buggy or malicious guest */
xenbus_dev_fatal(dev, -EINVAL,
- "guest requested %u queues, exceeding the maximum of %u.",
+ "guest requested %u queues, but valid range is 1 - %u.",
requested_num_queues, xenvif_max_queues);
return;
}
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index c3f07be4aa22..af82385be7c6 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -44,6 +44,8 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
unsigned long flags;
int err, err1;

+ guard(mutex)(&vpmem->flush_lock);
+
/*
* Don't bother to submit the request to the device if the device is
* not activated.
@@ -53,7 +55,6 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
return -EIO;
}

- might_sleep();
req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
if (!req_data)
return -ENOMEM;
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 2396d19ce549..77b196661905 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -64,6 +64,7 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
goto out_err;
}

+ mutex_init(&vpmem->flush_lock);
vpmem->vdev = vdev;
vdev->priv = vpmem;
err = init_vq(vpmem);
diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
index 0dddefe594c4..f72cf17f9518 100644
--- a/drivers/nvdimm/virtio_pmem.h
+++ b/drivers/nvdimm/virtio_pmem.h
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <uapi/linux/virtio_pmem.h>
#include <linux/libnvdimm.h>
+#include <linux/mutex.h>
#include <linux/spinlock.h>

struct virtio_pmem_request {
@@ -35,6 +36,9 @@ struct virtio_pmem {
/* Virtio pmem request queue */
struct virtqueue *req_vq;

+ /* Serialize flush requests to the device. */
+ struct mutex flush_lock;
+
/* nvdimm bus registers virtio pmem device */
struct nvdimm_bus *nvdimm_bus;
struct nvdimm_bus_descriptor nd_desc;
diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig
index e0d88d3199c1..11b098705ec6 100644
--- a/drivers/nvmem/Kconfig
+++ b/drivers/nvmem/Kconfig
@@ -30,7 +30,7 @@ source "drivers/nvmem/layouts/Kconfig"

config NVMEM_AN8855_EFUSE
tristate "Airoha AN8855 eFuse support"
- depends on MFD_AIROHA_AN8855 || COMPILE_TEST
+ depends on COMPILE_TEST
help
Say y here to enable support for reading eFuses on Airoha AN8855
Switch. These are e.g. used to store factory programmed
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 3b773aaf9d05..9c184e93f50c 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -804,11 +804,13 @@ static void __init of_unittest_property_copy(void)

new = __of_prop_dup(&p1, GFP_KERNEL);
unittest(new && propcmp(&p1, new), "empty property didn't copy correctly\n");
- __of_prop_free(new);
+ if (new)
+ __of_prop_free(new);

new = __of_prop_dup(&p2, GFP_KERNEL);
unittest(new && propcmp(&p2, new), "non-empty property didn't copy correctly\n");
- __of_prop_free(new);
+ if (new)
+ __of_prop_free(new);
#endif
}

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index bba4f7daff8c..775d4a36f2f5 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -241,7 +241,7 @@ unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp)
{
if (IS_ERR_OR_NULL(opp) || !opp->available) {
pr_err("%s: Invalid parameters\n", __func__);
- return 0;
+ return U32_MAX;
}

return opp->level;
diff --git a/drivers/pci/controller/dwc/pcie-sophgo.c b/drivers/pci/controller/dwc/pcie-sophgo.c
index ad4baaa34ffa..044088898819 100644
--- a/drivers/pci/controller/dwc/pcie-sophgo.c
+++ b/drivers/pci/controller/dwc/pcie-sophgo.c
@@ -161,6 +161,22 @@ static void sophgo_pcie_msi_enable(struct dw_pcie_rp *pp)
raw_spin_unlock_irqrestore(&pp->lock, flags);
}

+static void sophgo_pcie_disable_l0s_l1(struct dw_pcie_rp *pp)
+{
+ struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+ u32 offset, val;
+
+ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+
+ dw_pcie_dbi_ro_wr_en(pci);
+
+ val = dw_pcie_readl_dbi(pci, PCI_EXP_LNKCAP + offset);
+ val &= ~(PCI_EXP_LNKCAP_ASPM_L0S | PCI_EXP_LNKCAP_ASPM_L1);
+ dw_pcie_writel_dbi(pci, PCI_EXP_LNKCAP + offset, val);
+
+ dw_pcie_dbi_ro_wr_dis(pci);
+}
+
static int sophgo_pcie_host_init(struct dw_pcie_rp *pp)
{
int irq;
@@ -171,6 +187,8 @@ static int sophgo_pcie_host_init(struct dw_pcie_rp *pp)

irq_set_chained_handler_and_data(irq, sophgo_pcie_intx_handler, pp);

+ sophgo_pcie_disable_l0s_l1(pp);
+
sophgo_pcie_msi_enable(pp);

return 0;
diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c
index 24cc30a2ab6c..e0bf667c2b4c 100644
--- a/drivers/pci/controller/pcie-mediatek.c
+++ b/drivers/pci/controller/pcie-mediatek.c
@@ -575,8 +575,10 @@ static int mtk_pcie_init_irq_domain(struct mtk_pcie_port *port,

if (IS_ENABLED(CONFIG_PCI_MSI)) {
ret = mtk_pcie_allocate_msi_domains(port);
- if (ret)
+ if (ret) {
+ irq_domain_remove(port->irq_domain);
return ret;
+ }
}

return 0;
diff --git a/drivers/pci/controller/pcie-xilinx.c b/drivers/pci/controller/pcie-xilinx.c
index 937ea6ae1ac4..4aa139abac16 100644
--- a/drivers/pci/controller/pcie-xilinx.c
+++ b/drivers/pci/controller/pcie-xilinx.c
@@ -302,9 +302,10 @@ static int xilinx_allocate_msi_domains(struct xilinx_pcie *pcie)
return 0;
}

-static void xilinx_free_msi_domains(struct xilinx_pcie *pcie)
+static void xilinx_free_irq_domains(struct xilinx_pcie *pcie)
{
irq_domain_remove(pcie->msi_domain);
+ irq_domain_remove(pcie->leg_domain);
}

/* INTx Functions */
@@ -480,8 +481,10 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie *pcie)
phys_addr_t pa = ALIGN_DOWN(virt_to_phys(pcie), SZ_4K);

ret = xilinx_allocate_msi_domains(pcie);
- if (ret)
+ if (ret) {
+ irq_domain_remove(pcie->leg_domain);
return ret;
+ }

pcie_write(pcie, upper_32_bits(pa), XILINX_PCIE_REG_MSIBASE1);
pcie_write(pcie, lower_32_bits(pa), XILINX_PCIE_REG_MSIBASE2);
@@ -600,7 +603,7 @@ static int xilinx_pcie_probe(struct platform_device *pdev)

err = pci_host_probe(bridge);
if (err)
- xilinx_free_msi_domains(pcie);
+ xilinx_free_irq_domains(pcie);

return err;
}
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 78e108e47254..49e395eb013d 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -147,11 +147,19 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
* we have just allocated the page no one else should be
* using it.
*/
- VM_WARN_ON_ONCE_PAGE(!page_ref_count(page), page);
+ VM_WARN_ON_ONCE_PAGE(page_ref_count(page), page);
set_page_count(page, 1);
ret = vm_insert_page(vma, vaddr, page);
if (ret) {
gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len);
+
+ /*
+ * Reset the page count. We don't use put_page()
+ * because we don't want to trigger the
+ * p2pdma_folio_free() path.
+ */
+ set_page_count(page, 0);
+ percpu_ref_put(ref);
return ret;
}
percpu_ref_get(ref);
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 9369377725fa..0162acfb5789 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -271,21 +271,6 @@ static acpi_status decode_type1_hpx_record(union acpi_object *record,
return AE_OK;
}

-static bool pcie_root_rcb_set(struct pci_dev *dev)
-{
- struct pci_dev *rp = pcie_find_root_port(dev);
- u16 lnkctl;
-
- if (!rp)
- return false;
-
- pcie_capability_read_word(rp, PCI_EXP_LNKCTL, &lnkctl);
- if (lnkctl & PCI_EXP_LNKCTL_RCB)
- return true;
-
- return false;
-}
-
/* _HPX PCI Express Setting Record (Type 2) */
struct hpx_type2 {
u32 revision;
@@ -311,6 +296,7 @@ static void program_hpx_type2(struct pci_dev *dev, struct hpx_type2 *hpx)
{
int pos;
u32 reg32;
+ const struct pci_host_bridge *host;

if (!hpx)
return;
@@ -318,6 +304,15 @@ static void program_hpx_type2(struct pci_dev *dev, struct hpx_type2 *hpx)
if (!pci_is_pcie(dev))
return;

+ host = pci_find_host_bridge(dev->bus);
+
+ /*
+ * Only do the _HPX Type 2 programming if OS owns PCIe native
+ * hotplug but not AER.
+ */
+ if (!host->native_pcie_hotplug || host->native_aer)
+ return;
+
if (hpx->revision > 1) {
pci_warn(dev, "PCIe settings rev %d not supported\n",
hpx->revision);
@@ -325,33 +320,27 @@ static void program_hpx_type2(struct pci_dev *dev, struct hpx_type2 *hpx)
}

/*
- * Don't allow _HPX to change MPS or MRRS settings. We manage
- * those to make sure they're consistent with the rest of the
- * platform.
+ * We only allow _HPX to program DEVCTL bits related to AER, namely
+ * PCI_EXP_DEVCTL_CERE, PCI_EXP_DEVCTL_NFERE, PCI_EXP_DEVCTL_FERE,
+ * and PCI_EXP_DEVCTL_URRE.
+ *
+ * The rest of DEVCTL is managed by the OS to make sure it's
+ * consistent with the rest of the platform.
*/
- hpx->pci_exp_devctl_and |= PCI_EXP_DEVCTL_PAYLOAD |
- PCI_EXP_DEVCTL_READRQ;
- hpx->pci_exp_devctl_or &= ~(PCI_EXP_DEVCTL_PAYLOAD |
- PCI_EXP_DEVCTL_READRQ);
+ hpx->pci_exp_devctl_and |= ~PCI_EXP_AER_FLAGS;
+ hpx->pci_exp_devctl_or &= PCI_EXP_AER_FLAGS;

/* Initialize Device Control Register */
pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
~hpx->pci_exp_devctl_and, hpx->pci_exp_devctl_or);

- /* Initialize Link Control Register */
+ /* Log if _HPX attempts to modify Link Control Register */
if (pcie_cap_has_lnkctl(dev)) {
-
- /*
- * If the Root Port supports Read Completion Boundary of
- * 128, set RCB to 128. Otherwise, clear it.
- */
- hpx->pci_exp_lnkctl_and |= PCI_EXP_LNKCTL_RCB;
- hpx->pci_exp_lnkctl_or &= ~PCI_EXP_LNKCTL_RCB;
- if (pcie_root_rcb_set(dev))
- hpx->pci_exp_lnkctl_or |= PCI_EXP_LNKCTL_RCB;
-
- pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL,
- ~hpx->pci_exp_lnkctl_and, hpx->pci_exp_lnkctl_or);
+ if (hpx->pci_exp_lnkctl_and != 0xffff ||
+ hpx->pci_exp_lnkctl_or != 0)
+ pci_info(dev, "_HPX attempts Link Control setting (AND %#06x OR %#06x)\n",
+ hpx->pci_exp_lnkctl_and,
+ hpx->pci_exp_lnkctl_or);
}

/* Find Advanced Error Reporting Enhanced Capability */
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 2f0da5dbbba4..82e323b5aaa2 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1488,6 +1488,9 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool
|| (state == PCI_D2 && !dev->d2_support))
return -EIO;

+ if (dev->current_state == state)
+ return 0;
+
pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
if (PCI_POSSIBLE_ERROR(pmcsr)) {
pci_err(dev, "Unable to change power state from %s to %s, device inaccessible\n",
@@ -6714,7 +6717,7 @@ static void of_pci_bus_release_domain_nr(struct device *parent, int domain_nr)
return;

/* Release domain from IDA where it was allocated. */
- if (of_get_pci_domain_nr(parent->of_node) == domain_nr)
+ if (parent && of_get_pci_domain_nr(parent->of_node) == domain_nr)
ida_free(&pci_domain_nr_static_ida, domain_nr);
else
ida_free(&pci_domain_nr_dynamic_ida, domain_nr);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 36f8c0985430..565acfcd7cdb 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -88,6 +88,9 @@ struct pcie_tlp_log;
#define PCI_BUS_BRIDGE_MEM_WINDOW 1
#define PCI_BUS_BRIDGE_PREF_MEM_WINDOW 2

+#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
+ PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
+
extern const unsigned char pcie_link_speed[];
extern bool pci_early_dump;

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 0b5ed4722ac3..23bead9415fc 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -238,9 +238,6 @@ void pcie_ecrc_get_policy(char *str)
}
#endif /* CONFIG_PCIE_ECRC */

-#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
- PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
-
int pcie_aer_is_native(struct pci_dev *dev)
{
struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c
index 38a41ccf79b9..a0991da48213 100644
--- a/drivers/pci/pcie/portdrv.c
+++ b/drivers/pci/pcie/portdrv.c
@@ -557,10 +557,10 @@ static int pcie_port_remove_service(struct device *dev)

pciedev = to_pcie_device(dev);
driver = to_service_driver(dev->driver);
- if (driver && driver->remove) {
+ if (driver && driver->remove)
driver->remove(pciedev);
- put_device(dev);
- }
+
+ put_device(dev);
return 0;
}

diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
index 65e4b008be00..41d370f082ee 100644
--- a/drivers/pci/pcie/ptm.c
+++ b/drivers/pci/pcie/ptm.c
@@ -519,8 +519,10 @@ struct pci_ptm_debugfs *pcie_ptm_create_debugfs(struct device *dev, void *pdata,
return NULL;

dirname = devm_kasprintf(dev, GFP_KERNEL, "pcie_ptm_%s", dev_name(dev));
- if (!dirname)
+ if (!dirname) {
+ kfree(ptm_debugfs);
return NULL;
+ }

ptm_debugfs->debugfs = debugfs_create_dir(dirname, NULL);
ptm_debugfs->pdata = pdata;
@@ -551,6 +553,7 @@ void pcie_ptm_destroy_debugfs(struct pci_ptm_debugfs *ptm_debugfs)

mutex_destroy(&ptm_debugfs->lock);
debugfs_remove_recursive(ptm_debugfs->debugfs);
+ kfree(ptm_debugfs);
}
EXPORT_SYMBOL_GPL(pcie_ptm_destroy_debugfs);
#endif
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 9cd032dff31e..7d4f0db5ac26 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2251,7 +2251,8 @@ int pci_configure_extended_tags(struct pci_dev *dev, void *ign)
u16 ctl;
int ret;

- if (!pci_is_pcie(dev))
+ /* PCI_EXP_DEVCTL_EXT_TAG is RsvdP in VFs */
+ if (!pci_is_pcie(dev) || dev->is_virtfn)
return 0;

ret = pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
@@ -2380,6 +2381,37 @@ static void pci_configure_serr(struct pci_dev *dev)
}
}

+static void pci_configure_rcb(struct pci_dev *dev)
+{
+ struct pci_dev *rp;
+ u16 rp_lnkctl;
+
+ /*
+ * Per PCIe r7.0, sec 7.5.3.7, RCB is only meaningful in Root Ports
+ * (where it is read-only), Endpoints, and Bridges. It may only be
+ * set for Endpoints and Bridges if it is set in the Root Port. For
+ * Endpoints, it is 'RsvdP' for Virtual Functions.
+ */
+ if (!pci_is_pcie(dev) ||
+ pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
+ pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM ||
+ pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||
+ pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC ||
+ dev->is_virtfn)
+ return;
+
+ /* Root Port often not visible to virtualized guests */
+ rp = pcie_find_root_port(dev);
+ if (!rp)
+ return;
+
+ pcie_capability_read_word(rp, PCI_EXP_LNKCTL, &rp_lnkctl);
+ pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_RCB,
+ (rp_lnkctl & PCI_EXP_LNKCTL_RCB) ?
+ PCI_EXP_LNKCTL_RCB : 0);
+}
+
static void pci_configure_device(struct pci_dev *dev)
{
pci_configure_mps(dev);
@@ -2389,6 +2421,7 @@ static void pci_configure_device(struct pci_dev *dev)
pci_configure_aspm_l1ss(dev);
pci_configure_eetlp_prefix(dev);
pci_configure_serr(dev);
+ pci_configure_rcb(dev);

pci_acpi_program_hp_params(dev);
}
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index b9c252aa6fe0..9e073321b2dd 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -5581,6 +5581,7 @@ static void quirk_no_ext_tags(struct pci_dev *pdev)
pci_walk_bus(bridge->bus, pci_configure_extended_tags, NULL);
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_3WARE, 0x1004, quirk_no_ext_tags);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_3WARE, 0x1005, quirk_no_ext_tags);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0132, quirk_no_ext_tags);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0140, quirk_no_ext_tags);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0141, quirk_no_ext_tags);
@@ -6188,6 +6189,10 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_PERICOM, 0x2303,
pci_fixup_pericom_acs_store_forward);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_PERICOM, 0x2303,
pci_fixup_pericom_acs_store_forward);
+DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_PERICOM, 0xb404,
+ pci_fixup_pericom_acs_store_forward);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_PERICOM, 0xb404,
+ pci_fixup_pericom_acs_store_forward);

static void nvidia_ion_ahci_fixup(struct pci_dev *pdev)
{
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 5ba878f15db3..cc592ccff542 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -221,14 +221,21 @@ static struct resource *pbus_select_window_for_type(struct pci_bus *bus,

switch (iores_type) {
case IORESOURCE_IO:
- return pci_bus_resource_n(bus, PCI_BUS_BRIDGE_IO_WINDOW);
+ win = pci_bus_resource_n(bus, PCI_BUS_BRIDGE_IO_WINDOW);
+ if (win && (win->flags & IORESOURCE_IO))
+ return win;
+ return NULL;

case IORESOURCE_MEM:
mmio = pci_bus_resource_n(bus, PCI_BUS_BRIDGE_MEM_WINDOW);
mmio_pref = pci_bus_resource_n(bus, PCI_BUS_BRIDGE_PREF_MEM_WINDOW);

- if (!(type & IORESOURCE_PREFETCH) ||
- !(mmio_pref->flags & IORESOURCE_MEM))
+ if (mmio && !(mmio->flags & IORESOURCE_MEM))
+ mmio = NULL;
+ if (mmio_pref && !(mmio_pref->flags & IORESOURCE_MEM))
+ mmio_pref = NULL;
+
+ if (!(type & IORESOURCE_PREFETCH) || !mmio_pref)
return mmio;

if ((type & IORESOURCE_MEM_64) ||
@@ -1066,16 +1073,13 @@ static resource_size_t calculate_memsize(resource_size_t size,
resource_size_t min_size,
resource_size_t add_size,
resource_size_t children_add_size,
- resource_size_t old_size,
resource_size_t align)
{
if (size < min_size)
size = min_size;
- if (old_size == 1)
- old_size = 0;

size = max(size, add_size) + children_add_size;
- return ALIGN(max(size, old_size), align);
+ return ALIGN(size, align);
}

resource_size_t __weak pcibios_window_alignment(struct pci_bus *bus,
@@ -1223,66 +1227,43 @@ static inline resource_size_t calculate_mem_align(resource_size_t *aligns,
return min_align;
}

-/**
- * pbus_upstream_space_available - Check no upstream resource limits allocation
- * @bus: The bus
- * @res: The resource to help select the correct bridge window
- * @size: The size required from the bridge window
- * @align: Required alignment for the resource
- *
- * Check that @size can fit inside the upstream bridge resources that are
- * already assigned. Select the upstream bridge window based on the type of
- * @res.
- *
- * Return: %true if enough space is available on all assigned upstream
+/*
+ * Calculate bridge window head alignment that leaves no gaps in between
* resources.
*/
-static bool pbus_upstream_space_available(struct pci_bus *bus,
- struct resource *res,
- resource_size_t size,
- resource_size_t align)
-{
- struct resource_constraint constraint = {
- .max = RESOURCE_SIZE_MAX,
- .align = align,
- };
- struct pci_bus *downstream = bus;
-
- while ((bus = bus->parent)) {
- if (pci_is_root_bus(bus))
- break;
+static resource_size_t calculate_head_align(resource_size_t *aligns,
+ int max_order)
+{
+ resource_size_t head_align = 1;
+ resource_size_t remainder = 0;
+ int order;

- res = pbus_select_window(bus, res);
- if (!res)
- return false;
- if (!res->parent)
- continue;
+ /* Take the largest alignment as the starting point. */
+ head_align <<= max_order + __ffs(SZ_1M);

- if (resource_size(res) >= size) {
- struct resource gap = {};
+ for (order = max_order - 1; order >= 0; order--) {
+ resource_size_t align1 = 1;

- if (find_resource_space(res, &gap, size, &constraint) == 0) {
- gap.flags = res->flags;
- pci_dbg(bus->self,
- "Assigned bridge window %pR to %pR free space at %pR\n",
- res, &bus->busn_res, &gap);
- return true;
- }
- }
+ align1 <<= order + __ffs(SZ_1M);

- if (bus->self) {
- pci_info(bus->self,
- "Assigned bridge window %pR to %pR cannot fit 0x%llx required for %s bridging to %pR\n",
- res, &bus->busn_res,
- (unsigned long long)size,
- pci_name(downstream->self),
- &downstream->busn_res);
- }
+ /*
+ * Account smaller resources with alignment < max_order that
+ * could be used to fill head room if alignment less than
+ * max_order is used.
+ */
+ remainder += aligns[order];

- return false;
+ /*
+ * Test if head fill is enough to satisfy the alignment of
+ * the larger resources after reducing the alignment.
+ */
+ while ((head_align > align1) && (remainder >= head_align / 2)) {
+ head_align /= 2;
+ remainder -= head_align;
+ }
}

- return true;
+ return head_align;
}

/**
@@ -1310,14 +1291,12 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
{
struct pci_dev *dev;
resource_size_t min_align, win_align, align, size, size0, size1 = 0;
- resource_size_t aligns[28]; /* Alignments from 1MB to 128TB */
+ resource_size_t aligns[28] = {}; /* Alignments from 1MB to 128TB */
int order, max_order;
struct resource *b_res = pbus_select_window_for_type(bus, type);
resource_size_t children_add_size = 0;
resource_size_t children_add_align = 0;
resource_size_t add_align = 0;
- resource_size_t relaxed_align;
- resource_size_t old_size;

if (!b_res)
return;
@@ -1326,7 +1305,6 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
if (b_res->parent)
return;

- memset(aligns, 0, sizeof(aligns));
max_order = 0;
size = 0;

@@ -1371,12 +1349,8 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
continue;
}
size += max(r_size, align);
- /*
- * Exclude ranges with size > align from calculation of
- * the alignment.
- */
- if (r_size <= align)
- aligns[order] += align;
+
+ aligns[order] += align;
if (order > max_order)
max_order = order;

@@ -1388,44 +1362,20 @@ static void pbus_size_mem(struct pci_bus *bus, unsigned long type,
}
}

- old_size = resource_size(b_res);
win_align = window_alignment(bus, b_res->flags);
- min_align = calculate_mem_align(aligns, max_order);
+ min_align = calculate_head_align(aligns, max_order);
min_align = max(min_align, win_align);
- size0 = calculate_memsize(size, min_size, 0, 0, old_size, min_align);
+ size0 = calculate_memsize(size, min_size, 0, 0, win_align);

if (size0) {
resource_set_range(b_res, min_align, size0);
b_res->flags &= ~IORESOURCE_DISABLED;
}

- if (bus->self && size0 &&
- !pbus_upstream_space_available(bus, b_res, size0, min_align)) {
- relaxed_align = 1ULL << (max_order + __ffs(SZ_1M));
- relaxed_align = max(relaxed_align, win_align);
- min_align = min(min_align, relaxed_align);
- size0 = calculate_memsize(size, min_size, 0, 0, old_size, win_align);
- resource_set_range(b_res, min_align, size0);
- pci_info(bus->self, "bridge window %pR to %pR requires relaxed alignment rules\n",
- b_res, &bus->busn_res);
- }
-
if (realloc_head && (add_size > 0 || children_add_size > 0)) {
add_align = max(min_align, add_align);
size1 = calculate_memsize(size, min_size, add_size, children_add_size,
- old_size, add_align);
-
- if (bus->self && size1 &&
- !pbus_upstream_space_available(bus, b_res, size1, add_align)) {
- relaxed_align = 1ULL << (max_order + __ffs(SZ_1M));
- relaxed_align = max(relaxed_align, win_align);
- min_align = min(min_align, relaxed_align);
- size1 = calculate_memsize(size, min_size, add_size, children_add_size,
- old_size, win_align);
- pci_info(bus->self,
- "bridge window %pR to %pR requires relaxed alignment rules\n",
- b_res, &bus->busn_res);
- }
+ win_align);
}

if (!size0 && !size1) {
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index fa50645fedda..e4e4e63c64c4 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -105,6 +105,8 @@ struct arm_spe_pmu {
/* Keep track of our dynamic hotplug state */
static enum cpuhp_state arm_spe_pmu_online;

+static void arm_spe_pmu_stop(struct perf_event *event, int flags);
+
enum arm_spe_pmu_buf_fault_action {
SPE_PMU_BUF_FAULT_ACT_SPURIOUS,
SPE_PMU_BUF_FAULT_ACT_FATAL,
@@ -582,8 +584,8 @@ static u64 arm_spe_pmu_next_off(struct perf_output_handle *handle)
return limit;
}

-static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle,
- struct perf_event *event)
+static int arm_spe_perf_aux_output_begin(struct perf_output_handle *handle,
+ struct perf_event *event)
{
u64 base, limit;
struct arm_spe_pmu_buf *buf;
@@ -597,7 +599,6 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle,
/* Start a new aux session */
buf = perf_aux_output_begin(handle, event);
if (!buf) {
- event->hw.state |= PERF_HES_STOPPED;
/*
* We still need to clear the limit pointer, since the
* profiler might only be disabled by virtue of a fault.
@@ -617,6 +618,7 @@ static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle,

out_write_limit:
write_sysreg_s(limit, SYS_PMBLIMITR_EL1);
+ return (limit & PMBLIMITR_EL1_E) ? 0 : -EIO;
}

static void arm_spe_perf_aux_output_end(struct perf_output_handle *handle)
@@ -756,7 +758,10 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev)
* when we get to it.
*/
if (!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)) {
- arm_spe_perf_aux_output_begin(handle, event);
+ if (arm_spe_perf_aux_output_begin(handle, event)) {
+ arm_spe_pmu_stop(event, PERF_EF_UPDATE);
+ break;
+ }
isb();
}
break;
@@ -851,9 +856,10 @@ static void arm_spe_pmu_start(struct perf_event *event, int flags)
struct perf_output_handle *handle = this_cpu_ptr(spe_pmu->handle);

hwc->state = 0;
- arm_spe_perf_aux_output_begin(handle, event);
- if (hwc->state)
+ if (arm_spe_perf_aux_output_begin(handle, event)) {
+ arm_spe_pmu_stop(event, 0);
return;
+ }

reg = arm_spe_event_to_pmsfcr(event);
write_sysreg_s(reg, SYS_PMSFCR_EL1);
diff --git a/drivers/phy/freescale/phy-fsl-imx8qm-hsio.c b/drivers/phy/freescale/phy-fsl-imx8qm-hsio.c
index 977d21d753a5..279b8ac7822d 100644
--- a/drivers/phy/freescale/phy-fsl-imx8qm-hsio.c
+++ b/drivers/phy/freescale/phy-fsl-imx8qm-hsio.c
@@ -251,7 +251,7 @@ static void imx_hsio_configure_clk_pad(struct phy *phy)
struct imx_hsio_lane *lane = phy_get_drvdata(phy);
struct imx_hsio_priv *priv = lane->priv;

- if (strncmp(priv->refclk_pad, "output", 6) == 0) {
+ if (priv->refclk_pad && strncmp(priv->refclk_pad, "output", 6) == 0) {
pll = true;
regmap_update_bits(priv->misc, HSIO_CTRL0,
HSIO_IOB_A_0_TXOE | HSIO_IOB_A_0_M1M0_MASK,
diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
index 29de2f7bdae8..cafa618d70fd 100644
--- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
+++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
@@ -414,6 +414,8 @@ struct rk_hdptx_phy {
static const struct ropll_config ropll_tmds_cfg[] = {
{ 594000000ULL, 124, 124, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+ { 461101250ULL, 97, 97, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 71, 1, 53, 2, 6,
+ 35, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
{ 371250000ULL, 155, 155, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
{ 297000000ULL, 124, 124, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c
index d9e3a8d5932a..f05d8261624a 100644
--- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c
+++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c
@@ -725,8 +725,9 @@ static u32 aml_bank_pins(struct device_node *np)
if (of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3,
0, &of_args))
return 0;
- else
- return of_args.args[2];
+
+ of_node_put(of_args.np);
+ return of_args.args[2];
}

static int aml_bank_number(struct device_node *np)
@@ -736,8 +737,9 @@ static int aml_bank_number(struct device_node *np)
if (of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3,
0, &of_args))
return -EINVAL;
- else
- return of_args.args[1] >> 8;
+
+ of_node_put(of_args.np);
+ return of_args.args[1] >> 8;
}

static unsigned int aml_count_pins(struct device_node *np)
diff --git a/drivers/pinctrl/pinctrl-equilibrium.c b/drivers/pinctrl/pinctrl-equilibrium.c
index 2d04829b29c9..48b55c5bf8d4 100644
--- a/drivers/pinctrl/pinctrl-equilibrium.c
+++ b/drivers/pinctrl/pinctrl-equilibrium.c
@@ -846,6 +846,7 @@ static int pinbank_init(struct device_node *np,

bank->pin_base = spec.args[1];
bank->nr_pins = spec.args[2];
+ of_node_put(spec.np);

bank->aval_pinmap = readl(bank->membase + REG_AVAIL);
bank->id = id;
diff --git a/drivers/pinctrl/pinctrl-k230.c b/drivers/pinctrl/pinctrl-k230.c
index d716f23d837f..20f7c0f70eb7 100644
--- a/drivers/pinctrl/pinctrl-k230.c
+++ b/drivers/pinctrl/pinctrl-k230.c
@@ -65,6 +65,7 @@ struct k230_pmx_func {
};

struct k230_pinctrl {
+ struct device *dev;
struct pinctrl_desc pctl;
struct pinctrl_dev *pctl_dev;
struct regmap *regmap_base;
@@ -470,7 +471,7 @@ static int k230_pinctrl_parse_groups(struct device_node *np,
struct k230_pinctrl *info,
unsigned int index)
{
- struct device *dev = info->pctl_dev->dev;
+ struct device *dev = info->dev;
const __be32 *list;
int size, i, ret;

@@ -511,7 +512,7 @@ static int k230_pinctrl_parse_functions(struct device_node *np,
struct k230_pinctrl *info,
unsigned int index)
{
- struct device *dev = info->pctl_dev->dev;
+ struct device *dev = info->dev;
struct k230_pmx_func *func;
struct k230_pin_group *grp;
static unsigned int idx, i;
@@ -596,6 +597,8 @@ static int k230_pinctrl_probe(struct platform_device *pdev)
if (!info)
return -ENOMEM;

+ info->dev = dev;
+
pctl = &info->pctl;

pctl->name = "k230-pinctrl";
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index 998f23d6c317..d85e6c1f6321 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1359,6 +1359,7 @@ static int pcs_add_gpio_func(struct device_node *node, struct pcs_device *pcs)
}
range = devm_kzalloc(pcs->dev, sizeof(*range), GFP_KERNEL);
if (!range) {
+ of_node_put(gpiospec.np);
ret = -ENOMEM;
break;
}
@@ -1368,6 +1369,7 @@ static int pcs_add_gpio_func(struct device_node *node, struct pcs_device *pcs)
mutex_lock(&pcs->mutex);
list_add_tail(&range->node, &pcs->gpiofuncs);
mutex_unlock(&pcs->mutex);
+ of_node_put(gpiospec.np);
}
return ret;
}
diff --git a/drivers/pinctrl/qcom/pinctrl-sm8250-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-sm8250-lpass-lpi.c
index 64494a86490e..c27452eece3e 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm8250-lpass-lpi.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm8250-lpass-lpi.c
@@ -73,7 +73,7 @@ static const char * const i2s1_ws_groups[] = { "gpio7" };
static const char * const i2s1_data_groups[] = { "gpio8", "gpio9" };
static const char * const wsa_swr_clk_groups[] = { "gpio10" };
static const char * const wsa_swr_data_groups[] = { "gpio11" };
-static const char * const i2s2_data_groups[] = { "gpio12", "gpio12" };
+static const char * const i2s2_data_groups[] = { "gpio12", "gpio13" };

static const struct lpi_pingroup sm8250_groups[] = {
LPI_PINGROUP(0, 0, swr_tx_clk, qua_mi2s_sclk, _, _),
diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index 87634f6921b7..6b028615ee24 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -119,7 +119,7 @@ static int get_lightbar_version(struct cros_ec_dev *ec,
param = (struct ec_params_lightbar *)msg->data;
param->cmd = LIGHTBAR_CMD_VERSION;
msg->outsize = sizeof(param->cmd);
- msg->result = sizeof(resp->version);
+ msg->insize = sizeof(resp->version);
ret = cros_ec_cmd_xfer_status(ec->ec_dev, msg);
if (ret < 0 && ret != -EINVAL) {
ret = 0;
diff --git a/drivers/platform/chrome/cros_typec_switch.c b/drivers/platform/chrome/cros_typec_switch.c
index 8d7c34abb0a1..d8a28d4e51a8 100644
--- a/drivers/platform/chrome/cros_typec_switch.c
+++ b/drivers/platform/chrome/cros_typec_switch.c
@@ -230,20 +230,20 @@ static int cros_typec_register_switches(struct cros_typec_switch_data *sdata)

adev = to_acpi_device_node(fwnode);
if (!adev) {
- dev_err(fwnode->dev, "Couldn't get ACPI device handle\n");
+ dev_err(dev, "Couldn't get ACPI device handle for %pfwP\n", fwnode);
ret = -ENODEV;
goto err_switch;
}

ret = acpi_evaluate_integer(adev->handle, "_ADR", NULL, &index);
if (ACPI_FAILURE(ret)) {
- dev_err(fwnode->dev, "_ADR wasn't evaluated\n");
+ dev_err(dev, "_ADR wasn't evaluated for %pfwP\n", fwnode);
ret = -ENODATA;
goto err_switch;
}

if (index >= EC_USB_PD_MAX_PORTS) {
- dev_err(fwnode->dev, "Invalid port index number: %llu\n", index);
+ dev_err(dev, "%pfwP: Invalid port index number: %llu\n", fwnode, index);
ret = -EINVAL;
goto err_switch;
}
diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
index bc544a4a5266..e787480f4df2 100644
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -314,6 +314,61 @@ int amd_pmf_init_metrics_table(struct amd_pmf_dev *dev)
return 0;
}

+static int amd_pmf_reinit_ta(struct amd_pmf_dev *pdev)
+{
+ bool status;
+ int ret, i;
+
+ for (i = 0; i < ARRAY_SIZE(amd_pmf_ta_uuid); i++) {
+ ret = amd_pmf_tee_init(pdev, &amd_pmf_ta_uuid[i]);
+ if (ret) {
+ dev_err(pdev->dev, "TEE init failed for UUID[%d] ret: %d\n", i, ret);
+ return ret;
+ }
+
+ ret = amd_pmf_start_policy_engine(pdev);
+ dev_dbg(pdev->dev, "start policy engine ret: %d (UUID idx: %d)\n", ret, i);
+ status = ret == TA_PMF_TYPE_SUCCESS;
+ if (status)
+ break;
+ amd_pmf_tee_deinit(pdev);
+ }
+
+ return 0;
+}
+
+static int amd_pmf_restore_handler(struct device *dev)
+{
+ struct amd_pmf_dev *pdev = dev_get_drvdata(dev);
+ int ret;
+
+ if (pdev->buf) {
+ ret = amd_pmf_set_dram_addr(pdev, false);
+ if (ret)
+ return ret;
+ }
+
+ if (pdev->smart_pc_enabled)
+ amd_pmf_reinit_ta(pdev);
+
+ return 0;
+}
+
+static int amd_pmf_freeze_handler(struct device *dev)
+{
+ struct amd_pmf_dev *pdev = dev_get_drvdata(dev);
+
+ if (!pdev->smart_pc_enabled)
+ return 0;
+
+ cancel_delayed_work_sync(&pdev->pb_work);
+ /* Clear all TEE resources */
+ amd_pmf_tee_deinit(pdev);
+ pdev->session_id = 0;
+
+ return 0;
+}
+
static int amd_pmf_suspend_handler(struct device *dev)
{
struct amd_pmf_dev *pdev = dev_get_drvdata(dev);
@@ -347,7 +402,12 @@ static int amd_pmf_resume_handler(struct device *dev)
return 0;
}

-static DEFINE_SIMPLE_DEV_PM_OPS(amd_pmf_pm, amd_pmf_suspend_handler, amd_pmf_resume_handler);
+static const struct dev_pm_ops amd_pmf_pm = {
+ .suspend = amd_pmf_suspend_handler,
+ .resume = amd_pmf_resume_handler,
+ .freeze = amd_pmf_freeze_handler,
+ .restore = amd_pmf_restore_handler,
+};

static void amd_pmf_init_features(struct amd_pmf_dev *dev)
{
diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h
index bd19f2a6bc78..2da1885d8791 100644
--- a/drivers/platform/x86/amd/pmf/pmf.h
+++ b/drivers/platform/x86/amd/pmf/pmf.h
@@ -122,6 +122,12 @@ struct cookie_header {

typedef void (*apmf_event_handler_t)(acpi_handle handle, u32 event, void *data);

+static const uuid_t amd_pmf_ta_uuid[] __used = { UUID_INIT(0xd9b39bf2, 0x66bd, 0x4154, 0xaf, 0xb8,
+ 0x8a, 0xcc, 0x2b, 0x2b, 0x60, 0xd6),
+ UUID_INIT(0x6fd93b77, 0x3fb8, 0x524d, 0xb1, 0x2d,
+ 0xc5, 0x29, 0xb1, 0x3d, 0x85, 0x43),
+ };
+
/* APTS PMF BIOS Interface */
struct amd_pmf_apts_output {
u16 table_version;
@@ -888,4 +894,8 @@ void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_tab
void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in);
int amd_pmf_invoke_cmd_enact(struct amd_pmf_dev *dev);

+int amd_pmf_tee_init(struct amd_pmf_dev *dev, const uuid_t *uuid);
+void amd_pmf_tee_deinit(struct amd_pmf_dev *dev);
+int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev);
+
#endif /* PMF_H */
diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
index 6e8116bef4f6..903045935237 100644
--- a/drivers/platform/x86/amd/pmf/tee-if.c
+++ b/drivers/platform/x86/amd/pmf/tee-if.c
@@ -27,12 +27,6 @@ module_param(pb_side_load, bool, 0444);
MODULE_PARM_DESC(pb_side_load, "Sideload policy binaries debug policy failures");
#endif

-static const uuid_t amd_pmf_ta_uuid[] = { UUID_INIT(0xd9b39bf2, 0x66bd, 0x4154, 0xaf, 0xb8, 0x8a,
- 0xcc, 0x2b, 0x2b, 0x60, 0xd6),
- UUID_INIT(0x6fd93b77, 0x3fb8, 0x524d, 0xb1, 0x2d, 0xc5,
- 0x29, 0xb1, 0x3d, 0x85, 0x43),
- };
-
static const char *amd_pmf_uevent_as_str(unsigned int state)
{
switch (state) {
@@ -312,7 +306,7 @@ static void amd_pmf_invoke_cmd(struct work_struct *work)
schedule_delayed_work(&dev->pb_work, msecs_to_jiffies(pb_actions_ms));
}

-static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev)
+int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev)
{
struct cookie_header *header;
int res;
@@ -468,7 +462,7 @@ static int amd_pmf_register_input_device(struct amd_pmf_dev *dev)
return 0;
}

-static int amd_pmf_tee_init(struct amd_pmf_dev *dev, const uuid_t *uuid)
+int amd_pmf_tee_init(struct amd_pmf_dev *dev, const uuid_t *uuid)
{
u32 size;
int ret;
@@ -516,7 +510,7 @@ static int amd_pmf_tee_init(struct amd_pmf_dev *dev, const uuid_t *uuid)
return ret;
}

-static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev)
+void amd_pmf_tee_deinit(struct amd_pmf_dev *dev)
{
if (!dev->tee_ctx)
return;
diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c
index ad9d9f97960f..dfe45692c956 100644
--- a/drivers/platform/x86/hp/hp-wmi.c
+++ b/drivers/platform/x86/hp/hp-wmi.c
@@ -53,6 +53,66 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45E9-BE91-3D44E2C707E4");

#define zero_if_sup(tmp) (zero_insize_support?0:sizeof(tmp)) // use when zero insize is required

+enum hp_thermal_profile_omen_v0 {
+ HP_OMEN_V0_THERMAL_PROFILE_DEFAULT = 0x00,
+ HP_OMEN_V0_THERMAL_PROFILE_PERFORMANCE = 0x01,
+ HP_OMEN_V0_THERMAL_PROFILE_COOL = 0x02,
+};
+
+enum hp_thermal_profile_omen_v1 {
+ HP_OMEN_V1_THERMAL_PROFILE_DEFAULT = 0x30,
+ HP_OMEN_V1_THERMAL_PROFILE_PERFORMANCE = 0x31,
+ HP_OMEN_V1_THERMAL_PROFILE_COOL = 0x50,
+};
+
+enum hp_thermal_profile_omen_flags {
+ HP_OMEN_EC_FLAGS_TURBO = 0x04,
+ HP_OMEN_EC_FLAGS_NOTIMER = 0x02,
+ HP_OMEN_EC_FLAGS_JUSTSET = 0x01,
+};
+
+enum hp_thermal_profile_victus {
+ HP_VICTUS_THERMAL_PROFILE_DEFAULT = 0x00,
+ HP_VICTUS_THERMAL_PROFILE_PERFORMANCE = 0x01,
+ HP_VICTUS_THERMAL_PROFILE_QUIET = 0x03,
+};
+
+enum hp_thermal_profile_victus_s {
+ HP_VICTUS_S_THERMAL_PROFILE_DEFAULT = 0x00,
+ HP_VICTUS_S_THERMAL_PROFILE_PERFORMANCE = 0x01,
+};
+
+enum hp_thermal_profile {
+ HP_THERMAL_PROFILE_PERFORMANCE = 0x00,
+ HP_THERMAL_PROFILE_DEFAULT = 0x01,
+ HP_THERMAL_PROFILE_COOL = 0x02,
+ HP_THERMAL_PROFILE_QUIET = 0x03,
+};
+
+struct thermal_profile_params {
+ u8 performance;
+ u8 balanced;
+ u8 low_power;
+};
+
+static const struct thermal_profile_params victus_s_thermal_params = {
+ .performance = HP_VICTUS_S_THERMAL_PROFILE_PERFORMANCE,
+ .balanced = HP_VICTUS_S_THERMAL_PROFILE_DEFAULT,
+ .low_power = HP_VICTUS_S_THERMAL_PROFILE_DEFAULT,
+};
+
+static const struct thermal_profile_params omen_v1_thermal_params = {
+ .performance = HP_OMEN_V1_THERMAL_PROFILE_PERFORMANCE,
+ .balanced = HP_OMEN_V1_THERMAL_PROFILE_DEFAULT,
+ .low_power = HP_OMEN_V1_THERMAL_PROFILE_DEFAULT,
+};
+
+/*
+ * A generic pointer for the currently-active board's thermal profile
+ * parameters.
+ */
+static struct thermal_profile_params *active_thermal_profile_params;
+
/* DMI board names of devices that should use the omen specific path for
* thermal profiles.
* This was obtained by taking a look in the windows omen command center
@@ -93,12 +153,40 @@ static const char * const victus_thermal_profile_boards[] = {
};

/* DMI Board names of Victus 16-r and Victus 16-s laptops */
-static const char * const victus_s_thermal_profile_boards[] = {
- "8BBE", "8BD4", "8BD5",
- "8C78", "8C99", "8C9C",
- "8D41",
+static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst = {
+ {
+ .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BBE") },
+ .driver_data = (void *)&victus_s_thermal_params,
+ },
+ {
+ .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BD4") },
+ .driver_data = (void *)&victus_s_thermal_params,
+ },
+ {
+ .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BD5") },
+ .driver_data = (void *)&victus_s_thermal_params,
+ },
+ {
+ .matches = { DMI_MATCH(DMI_BOARD_NAME, "8C78") },
+ .driver_data = (void *)&omen_v1_thermal_params,
+ },
+ {
+ .matches = { DMI_MATCH(DMI_BOARD_NAME, "8C99") },
+ .driver_data = (void *)&victus_s_thermal_params,
+ },
+ {
+ .matches = { DMI_MATCH(DMI_BOARD_NAME, "8C9C") },
+ .driver_data = (void *)&victus_s_thermal_params,
+ },
+ {
+ .matches = { DMI_MATCH(DMI_BOARD_NAME, "8D41") },
+ .driver_data = (void *)&victus_s_thermal_params,
+ },
+ {},
};

+static bool is_victus_s_board;
+
enum hp_wmi_radio {
HPWMI_WIFI = 0x0,
HPWMI_BLUETOOTH = 0x1,
@@ -219,42 +307,6 @@ enum hp_wireless2_bits {
HPWMI_POWER_FW_OR_HW = HPWMI_POWER_BIOS | HPWMI_POWER_HARD,
};

-enum hp_thermal_profile_omen_v0 {
- HP_OMEN_V0_THERMAL_PROFILE_DEFAULT = 0x00,
- HP_OMEN_V0_THERMAL_PROFILE_PERFORMANCE = 0x01,
- HP_OMEN_V0_THERMAL_PROFILE_COOL = 0x02,
-};
-
-enum hp_thermal_profile_omen_v1 {
- HP_OMEN_V1_THERMAL_PROFILE_DEFAULT = 0x30,
- HP_OMEN_V1_THERMAL_PROFILE_PERFORMANCE = 0x31,
- HP_OMEN_V1_THERMAL_PROFILE_COOL = 0x50,
-};
-
-enum hp_thermal_profile_omen_flags {
- HP_OMEN_EC_FLAGS_TURBO = 0x04,
- HP_OMEN_EC_FLAGS_NOTIMER = 0x02,
- HP_OMEN_EC_FLAGS_JUSTSET = 0x01,
-};
-
-enum hp_thermal_profile_victus {
- HP_VICTUS_THERMAL_PROFILE_DEFAULT = 0x00,
- HP_VICTUS_THERMAL_PROFILE_PERFORMANCE = 0x01,
- HP_VICTUS_THERMAL_PROFILE_QUIET = 0x03,
-};
-
-enum hp_thermal_profile_victus_s {
- HP_VICTUS_S_THERMAL_PROFILE_DEFAULT = 0x00,
- HP_VICTUS_S_THERMAL_PROFILE_PERFORMANCE = 0x01,
-};
-
-enum hp_thermal_profile {
- HP_THERMAL_PROFILE_PERFORMANCE = 0x00,
- HP_THERMAL_PROFILE_DEFAULT = 0x01,
- HP_THERMAL_PROFILE_COOL = 0x02,
- HP_THERMAL_PROFILE_QUIET = 0x03,
-};
-
#define IS_HWBLOCKED(x) ((x & HPWMI_POWER_FW_OR_HW) != HPWMI_POWER_FW_OR_HW)
#define IS_SWBLOCKED(x) !(x & HPWMI_POWER_SOFT)

@@ -1575,15 +1627,8 @@ static int platform_profile_victus_set_ec(enum platform_profile_option profile)

static bool is_victus_s_thermal_profile(void)
{
- const char *board_name;
-
- board_name = dmi_get_system_info(DMI_BOARD_NAME);
- if (!board_name)
- return false;
-
- return match_string(victus_s_thermal_profile_boards,
- ARRAY_SIZE(victus_s_thermal_profile_boards),
- board_name) >= 0;
+ /* Initialised in driver init, hence safe to use here */
+ return is_victus_s_board;
}

static int victus_s_gpu_thermal_profile_get(bool *ctgp_enable,
@@ -1666,25 +1711,30 @@ static int victus_s_set_cpu_pl1_pl2(u8 pl1, u8 pl2)

static int platform_profile_victus_s_set_ec(enum platform_profile_option profile)
{
+ struct thermal_profile_params *params;
bool gpu_ctgp_enable, gpu_ppab_enable;
u8 gpu_dstate; /* Test shows 1 = 100%, 2 = 50%, 3 = 25%, 4 = 12.5% */
int err, tp;

+ params = active_thermal_profile_params;
+ if (!params)
+ return -ENODEV;
+
switch (profile) {
case PLATFORM_PROFILE_PERFORMANCE:
- tp = HP_VICTUS_S_THERMAL_PROFILE_PERFORMANCE;
+ tp = params->performance;
gpu_ctgp_enable = true;
gpu_ppab_enable = true;
gpu_dstate = 1;
break;
case PLATFORM_PROFILE_BALANCED:
- tp = HP_VICTUS_S_THERMAL_PROFILE_DEFAULT;
+ tp = params->balanced;
gpu_ctgp_enable = false;
gpu_ppab_enable = true;
gpu_dstate = 1;
break;
case PLATFORM_PROFILE_LOW_POWER:
- tp = HP_VICTUS_S_THERMAL_PROFILE_DEFAULT;
+ tp = params->low_power;
gpu_ctgp_enable = false;
gpu_ppab_enable = false;
gpu_dstate = 1;
@@ -2221,6 +2271,26 @@ static int hp_wmi_hwmon_init(void)
return 0;
}

+static void __init setup_active_thermal_profile_params(void)
+{
+ const struct dmi_system_id *id;
+
+ /*
+ * Currently only victus_s devices use the
+ * active_thermal_profile_params
+ */
+ id = dmi_first_match(victus_s_thermal_profile_boards);
+ if (id) {
+ /*
+ * Marking this boolean is required to ensure that
+ * is_victus_s_thermal_profile() behaves like a valid
+ * wrapper.
+ */
+ is_victus_s_board = true;
+ active_thermal_profile_params = id->driver_data;
+ }
+}
+
static int __init hp_wmi_init(void)
{
int event_capable = wmi_has_guid(HPWMI_EVENT_GUID);
@@ -2248,6 +2318,11 @@ static int __init hp_wmi_init(void)
goto err_destroy_input;
}

+ /*
+ * Setup active board's thermal profile parameters before
+ * starting platform driver probe.
+ */
+ setup_active_thermal_profile_params();
err = platform_driver_probe(&hp_wmi_driver, hp_wmi_bios_setup);
if (err)
goto err_unregister_device;
diff --git a/drivers/platform/x86/intel/int0002_vgpio.c b/drivers/platform/x86/intel/int0002_vgpio.c
index 6f5629dc3f8d..562e88025643 100644
--- a/drivers/platform/x86/intel/int0002_vgpio.c
+++ b/drivers/platform/x86/intel/int0002_vgpio.c
@@ -206,8 +206,8 @@ static int int0002_probe(struct platform_device *pdev)
* FIXME: augment this if we managed to pull handling of shared
* IRQs into gpiolib.
*/
- ret = devm_request_irq(dev, irq, int0002_irq,
- IRQF_ONESHOT | IRQF_SHARED, "INT0002", chip);
+ ret = devm_request_irq(dev, irq, int0002_irq, IRQF_SHARED, "INT0002",
+ chip);
if (ret) {
dev_err(dev, "Error requesting IRQ %d: %d\n", irq, ret);
return ret;
diff --git a/drivers/power/reset/nvmem-reboot-mode.c b/drivers/power/reset/nvmem-reboot-mode.c
index 41530b70cfc4..d260715fccf6 100644
--- a/drivers/power/reset/nvmem-reboot-mode.c
+++ b/drivers/power/reset/nvmem-reboot-mode.c
@@ -10,6 +10,7 @@
#include <linux/nvmem-consumer.h>
#include <linux/platform_device.h>
#include <linux/reboot-mode.h>
+#include <linux/slab.h>

struct nvmem_reboot_mode {
struct reboot_mode_driver reboot;
@@ -19,12 +20,22 @@ struct nvmem_reboot_mode {
static int nvmem_reboot_mode_write(struct reboot_mode_driver *reboot,
unsigned int magic)
{
- int ret;
struct nvmem_reboot_mode *nvmem_rbm;
+ size_t buf_len;
+ void *buf;
+ int ret;

nvmem_rbm = container_of(reboot, struct nvmem_reboot_mode, reboot);

- ret = nvmem_cell_write(nvmem_rbm->cell, &magic, sizeof(magic));
+ buf = nvmem_cell_read(nvmem_rbm->cell, &buf_len);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+ kfree(buf);
+
+ if (buf_len > sizeof(magic))
+ return -EINVAL;
+
+ ret = nvmem_cell_write(nvmem_rbm->cell, &magic, buf_len);
if (ret < 0)
dev_err(reboot->dev, "update reboot mode bits failed\n");

diff --git a/drivers/power/supply/ab8500_charger.c b/drivers/power/supply/ab8500_charger.c
index 5f4537766e5b..1813fbdfa1c1 100644
--- a/drivers/power/supply/ab8500_charger.c
+++ b/drivers/power/supply/ab8500_charger.c
@@ -3466,26 +3466,6 @@ static int ab8500_charger_probe(struct platform_device *pdev)
return ret;
}

- /* Request interrupts */
- for (i = 0; i < ARRAY_SIZE(ab8500_charger_irq); i++) {
- irq = platform_get_irq_byname(pdev, ab8500_charger_irq[i].name);
- if (irq < 0)
- return irq;
-
- ret = devm_request_threaded_irq(dev,
- irq, NULL, ab8500_charger_irq[i].isr,
- IRQF_SHARED | IRQF_NO_SUSPEND | IRQF_ONESHOT,
- ab8500_charger_irq[i].name, di);
-
- if (ret != 0) {
- dev_err(dev, "failed to request %s IRQ %d: %d\n"
- , ab8500_charger_irq[i].name, irq, ret);
- return ret;
- }
- dev_dbg(dev, "Requested %s IRQ %d: %d\n",
- ab8500_charger_irq[i].name, irq, ret);
- }
-
/* initialize lock */
spin_lock_init(&di->usb_state.usb_lock);
mutex_init(&di->usb_ipt_crnt_lock);
@@ -3614,6 +3594,26 @@ static int ab8500_charger_probe(struct platform_device *pdev)
return PTR_ERR(di->usb_chg.psy);
}

+ /* Request interrupts */
+ for (i = 0; i < ARRAY_SIZE(ab8500_charger_irq); i++) {
+ irq = platform_get_irq_byname(pdev, ab8500_charger_irq[i].name);
+ if (irq < 0)
+ return irq;
+
+ ret = devm_request_threaded_irq(dev,
+ irq, NULL, ab8500_charger_irq[i].isr,
+ IRQF_SHARED | IRQF_NO_SUSPEND | IRQF_ONESHOT,
+ ab8500_charger_irq[i].name, di);
+
+ if (ret != 0) {
+ dev_err(dev, "failed to request %s IRQ %d: %d\n"
+ , ab8500_charger_irq[i].name, irq, ret);
+ return ret;
+ }
+ dev_dbg(dev, "Requested %s IRQ %d: %d\n",
+ ab8500_charger_irq[i].name, irq, ret);
+ }
+
/*
* Check what battery we have, since we always have the USB
* psy, use that as a handle.
diff --git a/drivers/power/supply/act8945a_charger.c b/drivers/power/supply/act8945a_charger.c
index 3901a02f326a..9dec4486b143 100644
--- a/drivers/power/supply/act8945a_charger.c
+++ b/drivers/power/supply/act8945a_charger.c
@@ -597,14 +597,6 @@ static int act8945a_charger_probe(struct platform_device *pdev)
return irq ?: -ENXIO;
}

- ret = devm_request_irq(&pdev->dev, irq, act8945a_status_changed,
- IRQF_TRIGGER_FALLING, "act8945a_interrupt",
- charger);
- if (ret) {
- dev_err(&pdev->dev, "failed to request nIRQ pin IRQ\n");
- return ret;
- }
-
charger->desc.name = "act8945a-charger";
charger->desc.get_property = act8945a_charger_get_property;
charger->desc.properties = act8945a_charger_props;
@@ -625,6 +617,14 @@ static int act8945a_charger_probe(struct platform_device *pdev)
return PTR_ERR(charger->psy);
}

+ ret = devm_request_irq(&pdev->dev, irq, act8945a_status_changed,
+ IRQF_TRIGGER_FALLING, "act8945a_interrupt",
+ charger);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to request nIRQ pin IRQ\n");
+ return ret;
+ }
+
platform_set_drvdata(pdev, charger);

INIT_WORK(&charger->work, act8945a_work);
diff --git a/drivers/power/supply/bq256xx_charger.c b/drivers/power/supply/bq256xx_charger.c
index ae14162f017a..d3de4f8b80db 100644
--- a/drivers/power/supply/bq256xx_charger.c
+++ b/drivers/power/supply/bq256xx_charger.c
@@ -1741,6 +1741,12 @@ static int bq256xx_probe(struct i2c_client *client)
usb_register_notifier(bq->usb3_phy, &bq->usb_nb);
}

+ ret = bq256xx_power_supply_init(bq, &psy_cfg, dev);
+ if (ret) {
+ dev_err(dev, "Failed to register power supply\n");
+ return ret;
+ }
+
if (client->irq) {
ret = devm_request_threaded_irq(dev, client->irq, NULL,
bq256xx_irq_handler_thread,
@@ -1753,12 +1759,6 @@ static int bq256xx_probe(struct i2c_client *client)
}
}

- ret = bq256xx_power_supply_init(bq, &psy_cfg, dev);
- if (ret) {
- dev_err(dev, "Failed to register power supply\n");
- return ret;
- }
-
ret = bq256xx_hw_init(bq);
if (ret) {
dev_err(dev, "Cannot initialize the chip.\n");
diff --git a/drivers/power/supply/bq25980_charger.c b/drivers/power/supply/bq25980_charger.c
index 723858d62d14..73f06f09f134 100644
--- a/drivers/power/supply/bq25980_charger.c
+++ b/drivers/power/supply/bq25980_charger.c
@@ -1241,6 +1241,12 @@ static int bq25980_probe(struct i2c_client *client)
return ret;
}

+ ret = bq25980_power_supply_init(bq, dev);
+ if (ret) {
+ dev_err(dev, "Failed to register power supply\n");
+ return ret;
+ }
+
if (client->irq) {
ret = devm_request_threaded_irq(dev, client->irq, NULL,
bq25980_irq_handler_thread,
@@ -1251,12 +1257,6 @@ static int bq25980_probe(struct i2c_client *client)
return ret;
}

- ret = bq25980_power_supply_init(bq, dev);
- if (ret) {
- dev_err(dev, "Failed to register power supply\n");
- return ret;
- }
-
ret = bq25980_hw_init(bq);
if (ret) {
dev_err(dev, "Cannot initialize the chip.\n");
diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index 19445e39651c..45f0e39b8c2d 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -1172,7 +1172,7 @@ static inline int bq27xxx_write(struct bq27xxx_device_info *di, int reg_index,
return -EINVAL;

if (!di->bus.write)
- return -EPERM;
+ return -EOPNOTSUPP;

ret = di->bus.write(di, di->regs[reg_index], value, single);
if (ret < 0)
@@ -1191,7 +1191,7 @@ static inline int bq27xxx_read_block(struct bq27xxx_device_info *di, int reg_ind
return -EINVAL;

if (!di->bus.read_bulk)
- return -EPERM;
+ return -EOPNOTSUPP;

ret = di->bus.read_bulk(di, di->regs[reg_index], data, len);
if (ret < 0)
@@ -1210,7 +1210,7 @@ static inline int bq27xxx_write_block(struct bq27xxx_device_info *di, int reg_in
return -EINVAL;

if (!di->bus.write_bulk)
- return -EPERM;
+ return -EOPNOTSUPP;

ret = di->bus.write_bulk(di, di->regs[reg_index], data, len);
if (ret < 0)
diff --git a/drivers/power/supply/cpcap-battery.c b/drivers/power/supply/cpcap-battery.c
index 8106d1edcbc2..507fdc1c866d 100644
--- a/drivers/power/supply/cpcap-battery.c
+++ b/drivers/power/supply/cpcap-battery.c
@@ -1122,10 +1122,6 @@ static int cpcap_battery_probe(struct platform_device *pdev)

platform_set_drvdata(pdev, ddata);

- error = cpcap_battery_init_interrupts(pdev, ddata);
- if (error)
- return error;
-
error = cpcap_battery_init_iio(ddata);
if (error)
return error;
@@ -1142,6 +1138,10 @@ static int cpcap_battery_probe(struct platform_device *pdev)
return error;
}

+ error = cpcap_battery_init_interrupts(pdev, ddata);
+ if (error)
+ return error;
+
atomic_set(&ddata->active, 1);

error = cpcap_battery_calibrate(ddata);
diff --git a/drivers/power/supply/goldfish_battery.c b/drivers/power/supply/goldfish_battery.c
index 479195e35d73..5aa24e4dc445 100644
--- a/drivers/power/supply/goldfish_battery.c
+++ b/drivers/power/supply/goldfish_battery.c
@@ -224,12 +224,6 @@ static int goldfish_battery_probe(struct platform_device *pdev)
if (data->irq < 0)
return -ENODEV;

- ret = devm_request_irq(&pdev->dev, data->irq,
- goldfish_battery_interrupt,
- IRQF_SHARED, pdev->name, data);
- if (ret)
- return ret;
-
psy_cfg.drv_data = data;

data->ac = devm_power_supply_register(&pdev->dev,
@@ -244,6 +238,12 @@ static int goldfish_battery_probe(struct platform_device *pdev)
if (IS_ERR(data->battery))
return PTR_ERR(data->battery);

+ ret = devm_request_irq(&pdev->dev, data->irq,
+ goldfish_battery_interrupt,
+ IRQF_SHARED, pdev->name, data);
+ if (ret)
+ return ret;
+
GOLDFISH_BATTERY_WRITE(data, BATTERY_INT_ENABLE, BATTERY_INT_MASK);
return 0;
}
diff --git a/drivers/power/supply/pm8916_bms_vm.c b/drivers/power/supply/pm8916_bms_vm.c
index 5120be086e6f..de5d571c03e2 100644
--- a/drivers/power/supply/pm8916_bms_vm.c
+++ b/drivers/power/supply/pm8916_bms_vm.c
@@ -167,15 +167,6 @@ static int pm8916_bms_vm_battery_probe(struct platform_device *pdev)
if (ret < 0)
return -EINVAL;

- irq = platform_get_irq_byname(pdev, "fifo");
- if (irq < 0)
- return irq;
-
- ret = devm_request_threaded_irq(dev, irq, NULL, pm8916_bms_vm_fifo_update_done_irq,
- IRQF_ONESHOT, "pm8916_vm_bms", bat);
- if (ret)
- return ret;
-
ret = regmap_bulk_read(bat->regmap, bat->reg + PM8916_PERPH_TYPE, &tmp, 2);
if (ret)
goto comm_error;
@@ -220,6 +211,15 @@ static int pm8916_bms_vm_battery_probe(struct platform_device *pdev)
if (ret)
return dev_err_probe(dev, ret, "Unable to get battery info\n");

+ irq = platform_get_irq_byname(pdev, "fifo");
+ if (irq < 0)
+ return irq;
+
+ ret = devm_request_threaded_irq(dev, irq, NULL, pm8916_bms_vm_fifo_update_done_irq,
+ IRQF_ONESHOT, "pm8916_vm_bms", bat);
+ if (ret)
+ return ret;
+
platform_set_drvdata(pdev, bat);

return 0;
diff --git a/drivers/power/supply/pm8916_lbc.c b/drivers/power/supply/pm8916_lbc.c
index c74b75b1b267..6b631012a795 100644
--- a/drivers/power/supply/pm8916_lbc.c
+++ b/drivers/power/supply/pm8916_lbc.c
@@ -274,15 +274,6 @@ static int pm8916_lbc_charger_probe(struct platform_device *pdev)
return dev_err_probe(dev, -EINVAL,
"Wrong amount of reg values: %d (4 expected)\n", len);

- irq = platform_get_irq_byname(pdev, "usb_vbus");
- if (irq < 0)
- return irq;
-
- ret = devm_request_threaded_irq(dev, irq, NULL, pm8916_lbc_charger_state_changed_irq,
- IRQF_ONESHOT, "pm8916_lbc", chg);
- if (ret)
- return ret;
-
ret = device_property_read_u32_array(dev, "reg", chg->reg, len);
if (ret)
return ret;
@@ -332,6 +323,10 @@ static int pm8916_lbc_charger_probe(struct platform_device *pdev)
if (ret)
return dev_err_probe(dev, ret, "Unable to get battery info\n");

+ irq = platform_get_irq_byname(pdev, "usb_vbus");
+ if (irq < 0)
+ return irq;
+
chg->edev = devm_extcon_dev_allocate(dev, pm8916_lbc_charger_cable);
if (IS_ERR(chg->edev))
return PTR_ERR(chg->edev);
@@ -340,6 +335,11 @@ static int pm8916_lbc_charger_probe(struct platform_device *pdev)
if (ret < 0)
return dev_err_probe(dev, ret, "failed to register extcon device\n");

+ ret = devm_request_threaded_irq(dev, irq, NULL, pm8916_lbc_charger_state_changed_irq,
+ IRQF_ONESHOT, "pm8916_lbc", chg);
+ if (ret)
+ return ret;
+
ret = regmap_read(chg->regmap, chg->reg[LBC_USB] + PM8916_INT_RT_STS, &tmp);
if (ret)
goto comm_error;
diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c
index e6f01e0122e1..ff77dba29a3e 100644
--- a/drivers/power/supply/qcom_battmgr.c
+++ b/drivers/power/supply/qcom_battmgr.c
@@ -1244,7 +1244,8 @@ static unsigned int qcom_battmgr_sc8280xp_parse_technology(const char *chemistry
if ((!strncmp(chemistry, "LIO", BATTMGR_CHEMISTRY_LEN)) ||
(!strncmp(chemistry, "OOI", BATTMGR_CHEMISTRY_LEN)))
return POWER_SUPPLY_TECHNOLOGY_LION;
- if (!strncmp(chemistry, "LIP", BATTMGR_CHEMISTRY_LEN))
+ if (!strncmp(chemistry, "LIP", BATTMGR_CHEMISTRY_LEN) ||
+ !strncmp(chemistry, "LiP", BATTMGR_CHEMISTRY_LEN))
return POWER_SUPPLY_TECHNOLOGY_LIPO;

pr_err("Unknown battery technology '%s'\n", chemistry);
diff --git a/drivers/power/supply/rt9455_charger.c b/drivers/power/supply/rt9455_charger.c
index 1ffe7f02932f..5130d2395e88 100644
--- a/drivers/power/supply/rt9455_charger.c
+++ b/drivers/power/supply/rt9455_charger.c
@@ -1663,6 +1663,15 @@ static int rt9455_probe(struct i2c_client *client)
rt9455_charger_config.supplied_to = rt9455_charger_supplied_to;
rt9455_charger_config.num_supplicants =
ARRAY_SIZE(rt9455_charger_supplied_to);
+
+ info->charger = devm_power_supply_register(dev, &rt9455_charger_desc,
+ &rt9455_charger_config);
+ if (IS_ERR(info->charger)) {
+ dev_err(dev, "Failed to register charger\n");
+ ret = PTR_ERR(info->charger);
+ goto put_usb_notifier;
+ }
+
ret = devm_request_threaded_irq(dev, client->irq, NULL,
rt9455_irq_handler_thread,
IRQF_TRIGGER_LOW | IRQF_ONESHOT,
@@ -1678,14 +1687,6 @@ static int rt9455_probe(struct i2c_client *client)
goto put_usb_notifier;
}

- info->charger = devm_power_supply_register(dev, &rt9455_charger_desc,
- &rt9455_charger_config);
- if (IS_ERR(info->charger)) {
- dev_err(dev, "Failed to register charger\n");
- ret = PTR_ERR(info->charger);
- goto put_usb_notifier;
- }
-
return 0;

put_usb_notifier:
diff --git a/drivers/power/supply/sbs-battery.c b/drivers/power/supply/sbs-battery.c
index 943c82ee978f..43c48196c167 100644
--- a/drivers/power/supply/sbs-battery.c
+++ b/drivers/power/supply/sbs-battery.c
@@ -1174,24 +1174,6 @@ static int sbs_probe(struct i2c_client *client)

i2c_set_clientdata(client, chip);

- if (!chip->gpio_detect)
- goto skip_gpio;
-
- irq = gpiod_to_irq(chip->gpio_detect);
- if (irq <= 0) {
- dev_warn(&client->dev, "Failed to get gpio as irq: %d\n", irq);
- goto skip_gpio;
- }
-
- rc = devm_request_threaded_irq(&client->dev, irq, NULL, sbs_irq,
- IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
- dev_name(&client->dev), chip);
- if (rc) {
- dev_warn(&client->dev, "Failed to request irq: %d\n", rc);
- goto skip_gpio;
- }
-
-skip_gpio:
/*
* Before we register, we might need to make sure we can actually talk
* to the battery.
@@ -1217,6 +1199,24 @@ static int sbs_probe(struct i2c_client *client)
return dev_err_probe(&client->dev, PTR_ERR(chip->power_supply),
"Failed to register power supply\n");

+ if (!chip->gpio_detect)
+ goto out;
+
+ irq = gpiod_to_irq(chip->gpio_detect);
+ if (irq <= 0) {
+ dev_warn(&client->dev, "Failed to get gpio as irq: %d\n", irq);
+ goto out;
+ }
+
+ rc = devm_request_threaded_irq(&client->dev, irq, NULL, sbs_irq,
+ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ dev_name(&client->dev), chip);
+ if (rc) {
+ dev_warn(&client->dev, "Failed to request irq: %d\n", rc);
+ goto out;
+ }
+
+out:
dev_info(&client->dev,
"%s: battery gas gauge device registered\n", client->name);

diff --git a/drivers/power/supply/wm97xx_battery.c b/drivers/power/supply/wm97xx_battery.c
index b3b0c37a9dd2..f00722c88c6f 100644
--- a/drivers/power/supply/wm97xx_battery.c
+++ b/drivers/power/supply/wm97xx_battery.c
@@ -178,12 +178,6 @@ static int wm97xx_bat_probe(struct platform_device *dev)
"failed to get charge GPIO\n");
if (charge_gpiod) {
gpiod_set_consumer_name(charge_gpiod, "BATT CHRG");
- ret = request_irq(gpiod_to_irq(charge_gpiod),
- wm97xx_chrg_irq, 0,
- "AC Detect", dev);
- if (ret)
- return dev_err_probe(&dev->dev, ret,
- "failed to request GPIO irq\n");
props++; /* POWER_SUPPLY_PROP_STATUS */
}

@@ -199,10 +193,8 @@ static int wm97xx_bat_probe(struct platform_device *dev)
props++; /* POWER_SUPPLY_PROP_VOLTAGE_MIN */

prop = kcalloc(props, sizeof(*prop), GFP_KERNEL);
- if (!prop) {
- ret = -ENOMEM;
- goto err3;
- }
+ if (!prop)
+ return -ENOMEM;

prop[i++] = POWER_SUPPLY_PROP_PRESENT;
if (charge_gpiod)
@@ -236,15 +228,27 @@ static int wm97xx_bat_probe(struct platform_device *dev)
schedule_work(&bat_work);
} else {
ret = PTR_ERR(bat_psy);
- goto err4;
+ goto free;
+ }
+
+ if (charge_gpiod) {
+ ret = request_irq(gpiod_to_irq(charge_gpiod), wm97xx_chrg_irq,
+ 0, "AC Detect", dev);
+ if (ret) {
+ dev_err_probe(&dev->dev, ret,
+ "failed to request GPIO irq\n");
+ goto unregister;
+ }
}

return 0;
-err4:
+
+unregister:
+ power_supply_unregister(bat_psy);
+
+free:
kfree(prop);
-err3:
- if (charge_gpiod)
- free_irq(gpiod_to_irq(charge_gpiod), dev);
+
return ret;
}

diff --git a/drivers/powercap/intel_rapl_tpmi.c b/drivers/powercap/intel_rapl_tpmi.c
index 82201bf4685d..34c0bd1edd61 100644
--- a/drivers/powercap/intel_rapl_tpmi.c
+++ b/drivers/powercap/intel_rapl_tpmi.c
@@ -157,7 +157,7 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
tpmi_domain_flags = tpmi_domain_header >> 32 & 0xffff;

if (tpmi_domain_version == TPMI_VERSION_INVALID) {
- pr_warn(FW_BUG "Invalid version\n");
+ pr_debug("Invalid version, other instances may be valid\n");
return -ENODEV;
}

diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c
index 7a86cb090f76..2533c95b0ba9 100644
--- a/drivers/pwm/pwm-tiehrpwm.c
+++ b/drivers/pwm/pwm-tiehrpwm.c
@@ -237,8 +237,6 @@ static int ehrpwm_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
if (period_cycles < 1)
period_cycles = 1;

- pm_runtime_get_sync(pwmchip_parent(chip));
-
/* Update clock prescaler values */
ehrpwm_modify(pc->mmio_base, TBCTL, TBCTL_CLKDIV_MASK, tb_divval);

@@ -290,8 +288,6 @@ static int ehrpwm_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
if (!(duty_cycles > period_cycles))
ehrpwm_write(pc->mmio_base, cmp_reg, duty_cycles);

- pm_runtime_put_sync(pwmchip_parent(chip));
-
return 0;
}

@@ -378,6 +374,8 @@ static int ehrpwm_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
int err;
bool enabled = pwm->state.enabled;

+ guard(pm_runtime_active)(pwmchip_parent(chip));
+
if (state->polarity != pwm->state.polarity) {
if (enabled) {
ehrpwm_pwm_disable(chip, pwm);
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index b38b087eccfd..17c60d9547dc 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1431,6 +1431,33 @@ static int set_machine_constraints(struct regulator_dev *rdev)
int ret = 0;
const struct regulator_ops *ops = rdev->desc->ops;

+ /*
+ * If there is no mechanism for controlling the regulator then
+ * flag it as always_on so we don't end up duplicating checks
+ * for this so much. Note that we could control the state of
+ * a supply to control the output on a regulator that has no
+ * direct control.
+ */
+ if (!rdev->ena_pin && !ops->enable) {
+ if (rdev->supply_name && !rdev->supply)
+ return -EPROBE_DEFER;
+
+ if (rdev->supply)
+ rdev->constraints->always_on =
+ rdev->supply->rdev->constraints->always_on;
+ else
+ rdev->constraints->always_on = true;
+ }
+
+ /*
+ * If we want to enable this regulator, make sure that we know the
+ * supplying regulator.
+ */
+ if (rdev->constraints->always_on || rdev->constraints->boot_on) {
+ if (rdev->supply_name && !rdev->supply)
+ return -EPROBE_DEFER;
+ }
+
ret = machine_constraints_voltage(rdev, rdev->constraints);
if (ret != 0)
return ret;
@@ -1596,37 +1623,15 @@ static int set_machine_constraints(struct regulator_dev *rdev)
}
}

- /*
- * If there is no mechanism for controlling the regulator then
- * flag it as always_on so we don't end up duplicating checks
- * for this so much. Note that we could control the state of
- * a supply to control the output on a regulator that has no
- * direct control.
- */
- if (!rdev->ena_pin && !ops->enable) {
- if (rdev->supply_name && !rdev->supply)
- return -EPROBE_DEFER;
-
- if (rdev->supply)
- rdev->constraints->always_on =
- rdev->supply->rdev->constraints->always_on;
- else
- rdev->constraints->always_on = true;
- }
-
/* If the constraints say the regulator should be on at this point
* and we have control then make sure it is enabled.
*/
if (rdev->constraints->always_on || rdev->constraints->boot_on) {
bool supply_enabled = false;

- /* If we want to enable this regulator, make sure that we know
- * the supplying regulator.
- */
- if (rdev->supply_name && !rdev->supply)
- return -EPROBE_DEFER;
-
- /* If supplying regulator has already been enabled,
+ /* We have ensured a potential supply has been resolved above.
+ *
+ * If supplying regulator has already been enabled,
* it's not intended to have use_count increment
* when rdev is only boot-on.
*/
diff --git a/drivers/remoteproc/imx_dsp_rproc.c b/drivers/remoteproc/imx_dsp_rproc.c
index 6e78a01755c7..e61a08df113e 100644
--- a/drivers/remoteproc/imx_dsp_rproc.c
+++ b/drivers/remoteproc/imx_dsp_rproc.c
@@ -1026,9 +1026,11 @@ static int imx_dsp_rproc_load(struct rproc *rproc, const struct firmware *fw)
* Clear buffers after pm rumtime for internal ocram is not
* accessible if power and clock are not enabled.
*/
- list_for_each_entry(carveout, &rproc->carveouts, node) {
- if (carveout->va)
- memset(carveout->va, 0, carveout->len);
+ if (rproc->state == RPROC_OFFLINE) {
+ list_for_each_entry(carveout, &rproc->carveouts, node) {
+ if (carveout->va)
+ memset(carveout->va, 0, carveout->len);
+ }
}

ret = imx_dsp_rproc_elf_load_segments(rproc, fw);
diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig
index 78b7078478d4..b3b9e0f9d8c4 100644
--- a/drivers/reset/Kconfig
+++ b/drivers/reset/Kconfig
@@ -150,7 +150,7 @@ config RESET_K210
config RESET_K230
tristate "Reset controller driver for Canaan Kendryte K230 SoC"
depends on ARCH_CANAAN || COMPILE_TEST
- depends on OF
+ default ARCH_CANAAN
help
Support for the Canaan Kendryte K230 RISC-V SoC reset controller.
Say Y if you want to control reset signals provided by this
diff --git a/drivers/rtc/rtc-amlogic-a4.c b/drivers/rtc/rtc-amlogic-a4.c
index a993d35e1d6b..d766055d9584 100644
--- a/drivers/rtc/rtc-amlogic-a4.c
+++ b/drivers/rtc/rtc-amlogic-a4.c
@@ -371,7 +371,7 @@ static int aml_rtc_probe(struct platform_device *pdev)
}

ret = devm_request_irq(dev, rtc->irq, aml_rtc_handler,
- IRQF_ONESHOT, "aml-rtc alarm", rtc);
+ 0, "aml-rtc alarm", rtc);
if (ret) {
dev_err_probe(dev, ret, "IRQ%d request failed, ret = %d\n",
rtc->irq, ret);
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index be78a57f9bfd..8a70596a5544 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -236,7 +236,7 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid,
return sch;

err:
- kfree(sch);
+ put_device(&sch->dev);
return ERR_PTR(ret);
}

diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c
index 34bde6650fae..356a7c577ec3 100644
--- a/drivers/scsi/csiostor/csio_scsi.c
+++ b/drivers/scsi/csiostor/csio_scsi.c
@@ -2074,7 +2074,7 @@ csio_eh_lun_reset_handler(struct scsi_cmnd *cmnd)
struct csio_scsi_level_data sld;

if (!rn)
- goto fail;
+ goto fail_ret;

csio_dbg(hw, "Request to reset LUN:%llu (ssni:0x%x tgtid:%d)\n",
cmnd->device->lun, rn->flowid, rn->scsi_id);
@@ -2220,6 +2220,7 @@ csio_eh_lun_reset_handler(struct scsi_cmnd *cmnd)
csio_put_scsi_ioreq_lock(hw, scsim, ioreq);
fail:
CSIO_INC_STATS(rn, n_lun_rst_fail);
+fail_ret:
return FAILED;
}

diff --git a/drivers/scsi/elx/efct/efct_driver.c b/drivers/scsi/elx/efct/efct_driver.c
index 1bd42f7db177..528399f725d4 100644
--- a/drivers/scsi/elx/efct/efct_driver.c
+++ b/drivers/scsi/elx/efct/efct_driver.c
@@ -415,12 +415,6 @@ efct_intr_thread(int irq, void *handle)
return IRQ_HANDLED;
}

-static irqreturn_t
-efct_intr_msix(int irq, void *handle)
-{
- return IRQ_WAKE_THREAD;
-}
-
static int
efct_setup_msix(struct efct *efct, u32 num_intrs)
{
@@ -450,7 +444,7 @@ efct_setup_msix(struct efct *efct, u32 num_intrs)
intr_ctx->index = i;

rc = request_threaded_irq(pci_irq_vector(efct->pci, i),
- efct_intr_msix, efct_intr_thread, 0,
+ NULL, efct_intr_thread, IRQF_ONESHOT,
EFCT_DRIVER_NAME, intr_ctx);
if (rc) {
dev_err(&efct->pci->dev,
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 98e93900254c..5a6e1bb57e7c 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -1241,7 +1241,8 @@ static inline int pqi_report_phys_luns(struct pqi_ctrl_info *ctrl_info, void **b
dev_err(&ctrl_info->pci_dev->dev,
"RPL returned unsupported data format %u\n",
rpl_response_format);
- return -EINVAL;
+ rc = -EINVAL;
+ goto out_free_rpl_list;
} else {
dev_warn(&ctrl_info->pci_dev->dev,
"RPL returned extended format 2 instead of 4\n");
@@ -1253,8 +1254,10 @@ static inline int pqi_report_phys_luns(struct pqi_ctrl_info *ctrl_info, void **b

rpl_16byte_wwid_list = kmalloc(struct_size(rpl_16byte_wwid_list, lun_entries,
num_physicals), GFP_KERNEL);
- if (!rpl_16byte_wwid_list)
- return -ENOMEM;
+ if (!rpl_16byte_wwid_list) {
+ rc = -ENOMEM;
+ goto out_free_rpl_list;
+ }

put_unaligned_be32(num_physicals * sizeof(struct report_phys_lun_16byte_wwid),
&rpl_16byte_wwid_list->header.list_length);
@@ -1275,6 +1278,10 @@ static inline int pqi_report_phys_luns(struct pqi_ctrl_info *ctrl_info, void **b
*buffer = rpl_16byte_wwid_list;

return 0;
+
+out_free_rpl_list:
+ kfree(rpl_list);
+ return rc;
}

static inline int pqi_report_logical_luns(struct pqi_ctrl_info *ctrl_info, void **buffer)
diff --git a/drivers/soc/mediatek/mtk-svs.c b/drivers/soc/mediatek/mtk-svs.c
index f45537546553..99edecb204f2 100644
--- a/drivers/soc/mediatek/mtk-svs.c
+++ b/drivers/soc/mediatek/mtk-svs.c
@@ -9,6 +9,7 @@
#include <linux/bits.h>
#include <linux/clk.h>
#include <linux/completion.h>
+#include <linux/cleanup.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/debugfs.h>
@@ -789,7 +790,7 @@ static ssize_t svs_enable_debug_write(struct file *filp,
struct svs_bank *svsb = file_inode(filp)->i_private;
struct svs_platform *svsp = dev_get_drvdata(svsb->dev);
int enabled, ret;
- char *buf = NULL;
+ char *buf __free(kfree) = NULL;

if (count >= PAGE_SIZE)
return -EINVAL;
@@ -807,8 +808,6 @@ static ssize_t svs_enable_debug_write(struct file *filp,
svsb->mode_support = SVSB_MODE_ALL_DISABLE;
}

- kfree(buf);
-
return count;
}

diff --git a/drivers/soc/qcom/cmd-db.c b/drivers/soc/qcom/cmd-db.c
index ae66c2623d25..84a75d8c4b70 100644
--- a/drivers/soc/qcom/cmd-db.c
+++ b/drivers/soc/qcom/cmd-db.c
@@ -349,15 +349,16 @@ static int cmd_db_dev_probe(struct platform_device *pdev)
return -EINVAL;
}

- cmd_db_header = memremap(rmem->base, rmem->size, MEMREMAP_WC);
- if (!cmd_db_header) {
- ret = -ENOMEM;
+ cmd_db_header = devm_memremap(&pdev->dev, rmem->base, rmem->size, MEMREMAP_WC);
+ if (IS_ERR(cmd_db_header)) {
+ ret = PTR_ERR(cmd_db_header);
cmd_db_header = NULL;
return ret;
}

if (!cmd_db_magic_matches(cmd_db_header)) {
dev_err(&pdev->dev, "Invalid Command DB Magic\n");
+ cmd_db_header = NULL;
return -EINVAL;
}

diff --git a/drivers/soc/qcom/smem.c b/drivers/soc/qcom/smem.c
index f1d1b5aa5e4d..39177aa5793a 100644
--- a/drivers/soc/qcom/smem.c
+++ b/drivers/soc/qcom/smem.c
@@ -1215,7 +1215,9 @@ static int qcom_smem_probe(struct platform_device *pdev)
smem->item_count = qcom_smem_get_item_count(smem);
break;
case SMEM_GLOBAL_HEAP_VERSION:
- qcom_smem_map_global(smem, size);
+ ret = qcom_smem_map_global(smem, size);
+ if (ret < 0)
+ return ret;
smem->item_count = SMEM_ITEM_COUNT;
break;
default:
diff --git a/drivers/soundwire/Kconfig b/drivers/soundwire/Kconfig
index ad56393e4c93..196a7daaabdb 100644
--- a/drivers/soundwire/Kconfig
+++ b/drivers/soundwire/Kconfig
@@ -40,6 +40,7 @@ config SOUNDWIRE_INTEL
select AUXILIARY_BUS
depends on ACPI && SND_SOC
depends on SND_SOC_SOF_HDA_MLINK || !SND_SOC_SOF_HDA_MLINK
+ depends on SND_HDA_CORE || !SND_HDA_ALIGNED_MMIO
help
SoundWire Intel Master driver.
If you have an Intel platform which has a SoundWire Master then
diff --git a/drivers/spi/spi-wpcm-fiu.c b/drivers/spi/spi-wpcm-fiu.c
index a9aee2a6c7dc..c47b56f0933f 100644
--- a/drivers/spi/spi-wpcm-fiu.c
+++ b/drivers/spi/spi-wpcm-fiu.c
@@ -459,11 +459,11 @@ static int wpcm_fiu_probe(struct platform_device *pdev)

res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "memory");
fiu->memory = devm_ioremap_resource(dev, res);
- fiu->memory_size = min_t(size_t, resource_size(res), MAX_MEMORY_SIZE_TOTAL);
if (IS_ERR(fiu->memory))
return dev_err_probe(dev, PTR_ERR(fiu->memory),
"Failed to map flash memory window\n");

+ fiu->memory_size = min_t(size_t, resource_size(res), MAX_MEMORY_SIZE_TOTAL);
fiu->shm_regmap = syscon_regmap_lookup_by_phandle_optional(dev->of_node, "nuvoton,shm");

wpcm_fiu_hw_init(fiu);
diff --git a/drivers/staging/gpib/common/iblib.c b/drivers/staging/gpib/common/iblib.c
index 7cbb6a467177..b672dd6aad25 100644
--- a/drivers/staging/gpib/common/iblib.c
+++ b/drivers/staging/gpib/common/iblib.c
@@ -227,11 +227,10 @@ int ibonline(struct gpib_board *board)
#ifndef CONFIG_NIOS2
board->autospoll_task = kthread_run(&autospoll_thread, board,
"gpib%d_autospoll_kthread", board->minor);
- retval = IS_ERR(board->autospoll_task);
- if (retval) {
+ if (IS_ERR(board->autospoll_task)) {
dev_err(board->gpib_dev, "failed to create autospoll thread\n");
board->interface->detach(board);
- return retval;
+ return PTR_ERR(board->autospoll_task);
}
#endif
board->online = 1;
diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c
index 1f8412de9fa3..b6fddb437f55 100644
--- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c
+++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c
@@ -566,7 +566,7 @@ static int ni_usb_write_registers(struct ni_usb_priv *ni_priv,
retval, bytes_read);
ni_usb_dump_raw_block(in_data, bytes_read);
kfree(in_data);
- return retval;
+ return retval ?: -EINVAL;
}

mutex_unlock(&ni_priv->addressed_transfer_lock);
@@ -1780,7 +1780,7 @@ static int ni_usb_setup_init(struct gpib_board *board, struct ni_usb_register *w
i++;
if (i > NUM_INIT_WRITES) {
dev_err(&usb_dev->dev, "bug!, buffer overrun, i=%i\n", i);
- return 0;
+ return -EINVAL;
}
return i;
}
@@ -1799,10 +1799,12 @@ static int ni_usb_init(struct gpib_board *board)
return -ENOMEM;

writes_len = ni_usb_setup_init(board, writes);
- if (writes_len)
- retval = ni_usb_write_registers(ni_priv, writes, writes_len, &ibsta);
- else
- return -EFAULT;
+ if (writes_len < 0) {
+ kfree(writes);
+ return writes_len;
+ }
+
+ retval = ni_usb_write_registers(ni_priv, writes, writes_len, &ibsta);
kfree(writes);
if (retval) {
dev_err(&usb_dev->dev, "register write failed, retval=%i\n", retval);
diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c
index e509fdc715db..38c233a706c4 100644
--- a/drivers/staging/greybus/light.c
+++ b/drivers/staging/greybus/light.c
@@ -1008,14 +1008,18 @@ static int gb_lights_light_config(struct gb_lights *glights, u8 id)
if (!strlen(conf.name))
return -EINVAL;

- light->channels_count = conf.channel_count;
light->name = kstrndup(conf.name, NAMES_MAX, GFP_KERNEL);
if (!light->name)
return -ENOMEM;
- light->channels = kcalloc(light->channels_count,
+ light->channels = kcalloc(conf.channel_count,
sizeof(struct gb_channel), GFP_KERNEL);
if (!light->channels)
return -ENOMEM;
+ /*
+ * Publish channels_count only after channels allocation so cleanup
+ * doesn't walk a NULL channels pointer on allocation failure.
+ */
+ light->channels_count = conf.channel_count;

/* First we collect all the configurations for all channels */
for (i = 0; i < light->channels_count; i++) {
diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
index 3fc679b6f11b..aab5f9fca9c3 100644
--- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
@@ -128,6 +128,9 @@ sys_set_trip_temp(struct thermal_zone_device *tzd,
u32 l, h, mask, shift, intr;
int tj_max, val, ret;

+ if (temp == THERMAL_TEMP_INVALID)
+ temp = 0;
+
tj_max = intel_tcc_get_tjmax(zonedev->cpu);
if (tj_max < 0)
return tj_max;
diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
index 1a51a4d240ff..b6d0c92f5522 100644
--- a/drivers/thermal/thermal_of.c
+++ b/drivers/thermal/thermal_of.c
@@ -280,10 +280,10 @@ static bool thermal_of_cm_lookup(struct device_node *cm_np,
struct cooling_spec *c)
{
for_each_child_of_node_scoped(cm_np, child) {
- struct device_node *tr_np;
int count, i;

- tr_np = of_parse_phandle(child, "trip", 0);
+ struct device_node *tr_np __free(device_node) =
+ of_parse_phandle(child, "trip", 0);
if (tr_np != trip->priv)
continue;

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 282116765e64..2b9c8b39d68f 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -486,14 +486,14 @@ config SERIAL_IMX
can enable its onboard serial port by enabling this option.

config SERIAL_IMX_CONSOLE
- tristate "Console on IMX serial port"
+ bool "Console on IMX serial port"
depends on SERIAL_IMX
select SERIAL_CORE_CONSOLE
help
If you have enabled the serial port on the Freescale IMX
- CPU you can make it the console by answering Y/M to this option.
+ CPU you can make it the console by answering Y to this option.

- Even if you say Y/M here, the currently visible virtual console
+ Even if you say Y here, the currently visible virtual console
(/dev/tty0) will still be used as the system console by default, but
you can alter that using a kernel command line option such as
"console=ttymxc0". (Try "man bootparam" or see the documentation of
@@ -671,7 +671,7 @@ config SERIAL_SH_SCI_EARLYCON
default ARCH_RENESAS

config SERIAL_SH_SCI_DMA
- bool "DMA support" if EXPERT
+ bool "Support for DMA on SuperH SCI(F)" if EXPERT
depends on SERIAL_SH_SCI && DMA_ENGINE
default ARCH_RENESAS

diff --git a/drivers/ufs/host/Kconfig b/drivers/ufs/host/Kconfig
index 191fbd799ec5..48ee7e9b665e 100644
--- a/drivers/ufs/host/Kconfig
+++ b/drivers/ufs/host/Kconfig
@@ -72,6 +72,7 @@ config SCSI_UFS_QCOM
config SCSI_UFS_MEDIATEK
tristate "Mediatek specific hooks to UFS controller platform driver"
depends on SCSI_UFSHCD_PLATFORM && ARCH_MEDIATEK
+ depends on PM
depends on RESET_CONTROLLER
select PHY_MTK_UFS
select RESET_TI_SYSCON
diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c
index d0cbd96ad29d..3c63adca561d 100644
--- a/drivers/ufs/host/ufs-mediatek.c
+++ b/drivers/ufs/host/ufs-mediatek.c
@@ -2366,7 +2366,6 @@ static void ufs_mtk_remove(struct platform_device *pdev)
ufshcd_pltfrm_remove(pdev);
}

-#ifdef CONFIG_PM_SLEEP
static int ufs_mtk_system_suspend(struct device *dev)
{
struct ufs_hba *hba = dev_get_drvdata(dev);
@@ -2413,9 +2412,7 @@ static int ufs_mtk_system_resume(struct device *dev)

return ret;
}
-#endif

-#ifdef CONFIG_PM
static int ufs_mtk_runtime_suspend(struct device *dev)
{
struct ufs_hba *hba = dev_get_drvdata(dev);
@@ -2454,13 +2451,10 @@ static int ufs_mtk_runtime_resume(struct device *dev)

return ufshcd_runtime_resume(dev);
}
-#endif

static const struct dev_pm_ops ufs_mtk_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(ufs_mtk_system_suspend,
- ufs_mtk_system_resume)
- SET_RUNTIME_PM_OPS(ufs_mtk_runtime_suspend,
- ufs_mtk_runtime_resume, NULL)
+ SYSTEM_SLEEP_PM_OPS(ufs_mtk_system_suspend, ufs_mtk_system_resume)
+ RUNTIME_PM_OPS(ufs_mtk_runtime_suspend, ufs_mtk_runtime_resume, NULL)
.prepare = ufshcd_suspend_prepare,
.complete = ufshcd_resume_complete,
};
@@ -2470,7 +2464,7 @@ static struct platform_driver ufs_mtk_pltform = {
.remove = ufs_mtk_remove,
.driver = {
.name = "ufshcd-mtk",
- .pm = &ufs_mtk_pm_ops,
+ .pm = pm_ptr(&ufs_mtk_pm_ops),
.of_match_table = ufs_mtk_of_match,
},
};
diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c
index 1243a5cea91b..f0e32227c0b7 100644
--- a/drivers/usb/cdns3/core.c
+++ b/drivers/usb/cdns3/core.c
@@ -551,7 +551,7 @@ int cdns_resume(struct cdns *cdns)
}
}

- if (cdns->roles[cdns->role]->resume)
+ if (!role_changed && cdns->roles[cdns->role]->resume)
cdns->roles[cdns->role]->resume(cdns, power_lost);

return 0;
diff --git a/drivers/usb/gadget/udc/bdc/bdc_core.c b/drivers/usb/gadget/udc/bdc/bdc_core.c
index 5c3d8b64c0e7..f47aac078f6b 100644
--- a/drivers/usb/gadget/udc/bdc/bdc_core.c
+++ b/drivers/usb/gadget/udc/bdc/bdc_core.c
@@ -35,8 +35,8 @@ static int poll_oip(struct bdc *bdc, u32 usec)
u32 status;
int ret;

- ret = readl_poll_timeout(bdc->regs + BDC_BDCSC, status,
- (BDC_CSTS(status) != BDC_OIP), 10, usec);
+ ret = readl_poll_timeout_atomic(bdc->regs + BDC_BDCSC, status,
+ (BDC_CSTS(status) != BDC_OIP), 10, usec);
if (ret)
dev_err(bdc->dev, "operation timedout BDCSC: 0x%08x\n", status);
else
diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c
index 870a71f953f6..19ff8217818e 100644
--- a/drivers/usb/typec/tcpm/fusb302.c
+++ b/drivers/usb/typec/tcpm/fusb302.c
@@ -1756,8 +1756,7 @@ static int fusb302_probe(struct i2c_client *client)
}

ret = request_irq(chip->gpio_int_n_irq, fusb302_irq_intn,
- IRQF_ONESHOT | IRQF_TRIGGER_LOW,
- "fsc_interrupt_int_n", chip);
+ IRQF_TRIGGER_LOW, "fsc_interrupt_int_n", chip);
if (ret < 0) {
dev_err(dev, "cannot request IRQ for GPIO Int_N, ret=%d", ret);
goto tcpm_unregister_port;
diff --git a/drivers/usb/typec/ucsi/Kconfig b/drivers/usb/typec/ucsi/Kconfig
index b812be4d0e67..87dd992a4b9e 100644
--- a/drivers/usb/typec/ucsi/Kconfig
+++ b/drivers/usb/typec/ucsi/Kconfig
@@ -73,7 +73,6 @@ config CROS_EC_UCSI
tristate "UCSI Driver for ChromeOS EC"
depends on MFD_CROS_EC_DEV
depends on CROS_USBPD_NOTIFY
- depends on !EXTCON_TCSS_CROS_EC
default MFD_CROS_EC_DEV
help
This driver enables UCSI support for a ChromeOS EC. The EC is
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index d07093d7cc3f..ed2ae035deb1 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -1169,9 +1169,32 @@ hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev,
return 0;
}

+static void hisi_acc_vf_pci_reset_prepare(struct pci_dev *pdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
+ struct hisi_qm *qm = hisi_acc_vdev->pf_qm;
+ struct device *dev = &qm->pdev->dev;
+ u32 delay = 0;
+
+ /* All reset requests need to be queued for processing */
+ while (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
+ msleep(1);
+ if (++delay > QM_RESET_WAIT_TIMEOUT) {
+ dev_err(dev, "reset prepare failed\n");
+ return;
+ }
+ }
+
+ hisi_acc_vdev->set_reset_flag = true;
+}
+
static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev)
{
struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
+ struct hisi_qm *qm = hisi_acc_vdev->pf_qm;
+
+ if (hisi_acc_vdev->set_reset_flag)
+ clear_bit(QM_RESETTING, &qm->misc_ctl);

if (hisi_acc_vdev->core_device.vdev.migration_flags !=
VFIO_MIGRATION_STOP_COPY)
@@ -1690,6 +1713,7 @@ static const struct pci_device_id hisi_acc_vfio_pci_table[] = {
MODULE_DEVICE_TABLE(pci, hisi_acc_vfio_pci_table);

static const struct pci_error_handlers hisi_acc_vf_err_handlers = {
+ .reset_prepare = hisi_acc_vf_pci_reset_prepare,
.reset_done = hisi_acc_vf_pci_aer_reset_done,
.error_detected = vfio_pci_core_aer_err_detected,
};
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
index 91002ceeebc1..6253fa074003 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
@@ -27,6 +27,7 @@

#define ERROR_CHECK_TIMEOUT 100
#define CHECK_DELAY_TIME 100
+#define QM_RESET_WAIT_TIMEOUT 60000

#define QM_SQC_VFT_BASE_SHIFT_V2 28
#define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0)
@@ -110,6 +111,7 @@ struct hisi_acc_vf_migration_file {
struct hisi_acc_vf_core_device {
struct vfio_pci_core_device core_device;
u8 match_done;
+ bool set_reset_flag;
/*
* io_base is only valid when dev_opened is true,
* which is protected by open_mutex.
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 5efe7535f41e..085373d71e9c 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -589,6 +589,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_enable);

void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
{
+ struct pci_dev *bridge;
struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_dummy_resource *dummy_res, *tmp;
struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
@@ -695,12 +696,20 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
* We can not use the "try" reset interface here, which will
* overwrite the previously restored configuration information.
*/
- if (vdev->reset_works && pci_dev_trylock(pdev)) {
- if (!__pci_reset_function_locked(pdev))
- vdev->needs_reset = false;
- pci_dev_unlock(pdev);
+ if (vdev->reset_works) {
+ bridge = pci_upstream_bridge(pdev);
+ if (bridge && !pci_dev_trylock(bridge))
+ goto out_restore_state;
+ if (pci_dev_trylock(pdev)) {
+ if (!__pci_reset_function_locked(pdev))
+ vdev->needs_reset = false;
+ pci_dev_unlock(pdev);
+ }
+ if (bridge)
+ pci_dev_unlock(bridge);
}

+out_restore_state:
pci_restore_state(pdev);
out:
pci_disable_device(pdev);
diff --git a/drivers/video/backlight/qcom-wled.c b/drivers/video/backlight/qcom-wled.c
index a63bb42c8f8b..8054e4787725 100644
--- a/drivers/video/backlight/qcom-wled.c
+++ b/drivers/video/backlight/qcom-wled.c
@@ -1244,6 +1244,15 @@ static const struct wled_var_cfg wled4_ovp_cfg = {
.size = ARRAY_SIZE(wled4_ovp_values),
};

+static const u32 pmi8994_wled_ovp_values[] = {
+ 31000, 29500, 19400, 17800,
+};
+
+static const struct wled_var_cfg pmi8994_wled_ovp_cfg = {
+ .values = pmi8994_wled_ovp_values,
+ .size = ARRAY_SIZE(pmi8994_wled_ovp_values),
+};
+
static inline u32 wled5_ovp_values_fn(u32 idx)
{
/*
@@ -1357,6 +1366,29 @@ static int wled_configure(struct wled *wled)
},
};

+ const struct wled_u32_opts pmi8994_wled_opts[] = {
+ {
+ .name = "qcom,current-boost-limit",
+ .val_ptr = &cfg->boost_i_limit,
+ .cfg = &wled4_boost_i_limit_cfg,
+ },
+ {
+ .name = "qcom,current-limit-microamp",
+ .val_ptr = &cfg->string_i_limit,
+ .cfg = &wled4_string_i_limit_cfg,
+ },
+ {
+ .name = "qcom,ovp-millivolt",
+ .val_ptr = &cfg->ovp,
+ .cfg = &pmi8994_wled_ovp_cfg,
+ },
+ {
+ .name = "qcom,switching-freq",
+ .val_ptr = &cfg->switch_freq,
+ .cfg = &wled3_switch_freq_cfg,
+ },
+ };
+
const struct wled_u32_opts wled5_opts[] = {
{
.name = "qcom,current-boost-limit",
@@ -1423,8 +1455,14 @@ static int wled_configure(struct wled *wled)
break;

case 4:
- u32_opts = wled4_opts;
- size = ARRAY_SIZE(wled4_opts);
+ if (of_device_is_compatible(dev->of_node, "qcom,pmi8950-wled") ||
+ of_device_is_compatible(dev->of_node, "qcom,pmi8994-wled")) {
+ u32_opts = pmi8994_wled_opts;
+ size = ARRAY_SIZE(pmi8994_wled_opts);
+ } else {
+ u32_opts = wled4_opts;
+ size = ARRAY_SIZE(wled4_opts);
+ }
*cfg = wled4_config_defaults;
wled->wled_set_brightness = wled4_set_brightness;
wled->wled_sync_toggle = wled3_sync_toggle;
diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
index ed770222660b..685e629e7e16 100644
--- a/drivers/video/fbdev/au1200fb.c
+++ b/drivers/video/fbdev/au1200fb.c
@@ -1724,8 +1724,10 @@ static int au1200fb_drv_probe(struct platform_device *dev)

/* Now hook interrupt too */
irq = platform_get_irq(dev, 0);
- if (irq < 0)
- return irq;
+ if (irq < 0) {
+ ret = irq;
+ goto failed;
+ }

ret = request_irq(irq, au1200fb_handle_irq,
IRQF_SHARED, "lcd", (void *)dev);
diff --git a/drivers/video/of_display_timing.c b/drivers/video/of_display_timing.c
index bebd371c6b93..a4cd446ac5a5 100644
--- a/drivers/video/of_display_timing.c
+++ b/drivers/video/of_display_timing.c
@@ -195,7 +195,7 @@ struct display_timings *of_get_display_timings(const struct device_node *np)
disp->num_timings = 0;
disp->native_mode = 0;

- for_each_child_of_node(timings_np, entry) {
+ for_each_child_of_node_scoped(timings_np, child) {
struct display_timing *dt;
int r;

@@ -206,7 +206,7 @@ struct display_timings *of_get_display_timings(const struct device_node *np)
goto timingfail;
}

- r = of_parse_display_timing(entry, dt);
+ r = of_parse_display_timing(child, dt);
if (r) {
/*
* to not encourage wrong devicetrees, fail in case of
@@ -218,7 +218,7 @@ struct display_timings *of_get_display_timings(const struct device_node *np)
goto timingfail;
}

- if (native_mode == entry)
+ if (native_mode == child)
disp->native_mode = disp->num_timings;

disp->timings[disp->num_timings] = dt;
diff --git a/drivers/watchdog/starfive-wdt.c b/drivers/watchdog/starfive-wdt.c
index ed71d3960a0f..af55adc4a3c6 100644
--- a/drivers/watchdog/starfive-wdt.c
+++ b/drivers/watchdog/starfive-wdt.c
@@ -446,7 +446,7 @@ static int starfive_wdt_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, wdt);
pm_runtime_enable(&pdev->dev);
if (pm_runtime_enabled(&pdev->dev)) {
- ret = pm_runtime_get_sync(&pdev->dev);
+ ret = pm_runtime_resume_and_get(&pdev->dev);
if (ret < 0)
return ret;
} else {
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 49c3f9926394..8c44a25a7d2b 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -724,6 +724,7 @@ static int __init balloon_add_regions(void)
static int __init balloon_init(void)
{
struct task_struct *task;
+ unsigned long current_pages;
int rc;

if (!xen_domain())
@@ -731,12 +732,18 @@ static int __init balloon_init(void)

pr_info("Initialising balloon driver\n");

- if (xen_released_pages >= get_num_physpages()) {
- WARN(1, "Released pages underflow current target");
- return -ERANGE;
+ if (xen_pv_domain()) {
+ if (xen_released_pages >= xen_start_info->nr_pages)
+ goto underflow;
+ current_pages = min(xen_start_info->nr_pages -
+ xen_released_pages, max_pfn);
+ } else {
+ if (xen_unpopulated_pages >= get_num_physpages())
+ goto underflow;
+ current_pages = get_num_physpages() - xen_unpopulated_pages;
}

- balloon_stats.current_pages = get_num_physpages() - xen_released_pages;
+ balloon_stats.current_pages = current_pages;
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
@@ -767,6 +774,10 @@ static int __init balloon_init(void)
xen_balloon_init();

return 0;
+
+ underflow:
+ WARN(1, "Released pages underflow current target");
+ return -ERANGE;
}
subsys_initcall(balloon_init);

diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c
index 29257d2639db..43a918c498c6 100644
--- a/drivers/xen/grant-dma-ops.c
+++ b/drivers/xen/grant-dma-ops.c
@@ -362,7 +362,8 @@ static int xen_grant_init_backend_domid(struct device *dev,
if (np) {
ret = xen_dt_grant_init_backend_domid(dev, np, backend_domid);
of_node_put(np);
- } else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT) || xen_pv_domain()) {
+ } else if (!xen_initial_domain() &&
+ (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT) || xen_pv_domain())) {
dev_info(dev, "Using dom0 as backend\n");
*backend_domid = 0;
ret = 0;
diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c
index d6fc2aefe264..1dc0b495c8e5 100644
--- a/drivers/xen/unpopulated-alloc.c
+++ b/drivers/xen/unpopulated-alloc.c
@@ -18,6 +18,9 @@ static unsigned int list_count;

static struct resource *target_resource;

+/* Pages to subtract from the memory count when setting balloon target. */
+unsigned long xen_unpopulated_pages __initdata;
+
/*
* If arch is not happy with system "iomem_resource" being used for
* the region allocation it can provide it's own view by creating specific
diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
index 99b3ced12805..78721412951c 100644
--- a/fs/btrfs/accessors.h
+++ b/fs/btrfs/accessors.h
@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <uapi/linux/btrfs_tree.h>
+#include "fs.h"
#include "extent_io.h"

struct extent_buffer;
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 21df48e6c4fa..7d8aaa019c8c 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -41,13 +41,17 @@ static bool bbio_has_ordered_extent(const struct btrfs_bio *bbio)
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
*/
-void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
+void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode, u64 file_offset,
btrfs_bio_end_io_t end_io, void *private)
{
+ /* @inode parameter is mandatory. */
+ ASSERT(inode);
+
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
- bbio->fs_info = fs_info;
+ bbio->inode = inode;
bbio->end_io = end_io;
bbio->private = private;
+ bbio->file_offset = file_offset;
atomic_set(&bbio->pending_ios, 1);
WRITE_ONCE(bbio->status, BLK_STS_OK);
}
@@ -60,7 +64,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
* a mempool.
*/
struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
- struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode, u64 file_offset,
btrfs_bio_end_io_t end_io, void *private)
{
struct btrfs_bio *bbio;
@@ -68,7 +72,7 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,

bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
bbio = btrfs_bio(bio);
- btrfs_bio_init(bbio, fs_info, end_io, private);
+ btrfs_bio_init(bbio, inode, file_offset, end_io, private);
return bbio;
}

@@ -85,9 +89,7 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
return ERR_CAST(bio);

bbio = btrfs_bio(bio);
- btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
- bbio->inode = orig_bbio->inode;
- bbio->file_offset = orig_bbio->file_offset;
+ btrfs_bio_init(bbio, orig_bbio->inode, orig_bbio->file_offset, NULL, orig_bbio);
orig_bbio->file_offset += map_length;
if (bbio_has_ordered_extent(bbio)) {
refcount_inc(&orig_bbio->ordered->refs);
@@ -100,6 +102,12 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,

void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
+ /* Make sure we're already in task context. */
+ ASSERT(in_task());
+
+ if (bbio->async_csum)
+ wait_for_completion(&bbio->csum_done);
+
bbio->bio.bi_status = status;
if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
struct btrfs_bio *orig_bbio = bbio->private;
@@ -244,9 +252,8 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
bio_add_folio_nofail(repair_bio, folio, sectorsize, foff);

repair_bbio = btrfs_bio(repair_bio);
- btrfs_bio_init(repair_bbio, fs_info, NULL, fbio);
- repair_bbio->inode = failed_bbio->inode;
- repair_bbio->file_offset = failed_bbio->file_offset + bio_offset;
+ btrfs_bio_init(repair_bbio, failed_bbio->inode, failed_bbio->file_offset + bio_offset,
+ NULL, fbio);

mirror = next_repair_mirror(fbio, failed_bbio->mirror_num);
btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror);
@@ -317,36 +324,35 @@ static struct workqueue_struct *btrfs_end_io_wq(const struct btrfs_fs_info *fs_i
return fs_info->endio_workers;
}

-static void btrfs_end_bio_work(struct work_struct *work)
+static void simple_end_io_work(struct work_struct *work)
{
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
+ struct bio *bio = &bbio->bio;

- /* Metadata reads are checked and repaired by the submitter. */
- if (is_data_bbio(bbio))
- btrfs_check_read_bio(bbio, bbio->bio.bi_private);
- else
- btrfs_bio_end_io(bbio, bbio->bio.bi_status);
+ if (bio_op(bio) == REQ_OP_READ) {
+ /* Metadata reads are checked and repaired by the submitter. */
+ if (is_data_bbio(bbio))
+ return btrfs_check_read_bio(bbio, bbio->bio.bi_private);
+ return btrfs_bio_end_io(bbio, bbio->bio.bi_status);
+ }
+ if (bio_is_zone_append(bio) && !bio->bi_status)
+ btrfs_record_physical_zoned(bbio);
+ btrfs_bio_end_io(bbio, bbio->bio.bi_status);
}

static void btrfs_simple_end_io(struct bio *bio)
{
struct btrfs_bio *bbio = btrfs_bio(bio);
struct btrfs_device *dev = bio->bi_private;
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;

btrfs_bio_counter_dec(fs_info);

if (bio->bi_status)
btrfs_log_dev_io_error(bio, dev);

- if (bio_op(bio) == REQ_OP_READ) {
- INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
- queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
- } else {
- if (bio_is_zone_append(bio) && !bio->bi_status)
- btrfs_record_physical_zoned(bbio);
- btrfs_bio_end_io(bbio, bbio->bio.bi_status);
- }
+ INIT_WORK(&bbio->end_io_work, simple_end_io_work);
+ queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
}

static void btrfs_raid56_end_io(struct bio *bio)
@@ -354,6 +360,9 @@ static void btrfs_raid56_end_io(struct bio *bio)
struct btrfs_io_context *bioc = bio->bi_private;
struct btrfs_bio *bbio = btrfs_bio(bio);

+ /* RAID56 endio is always handled in workqueue. */
+ ASSERT(in_task());
+
btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
@@ -364,11 +373,12 @@ static void btrfs_raid56_end_io(struct bio *bio)
btrfs_put_bioc(bioc);
}

-static void btrfs_orig_write_end_io(struct bio *bio)
+static void orig_write_end_io_work(struct work_struct *work)
{
+ struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
+ struct bio *bio = &bbio->bio;
struct btrfs_io_stripe *stripe = bio->bi_private;
struct btrfs_io_context *bioc = stripe->bioc;
- struct btrfs_bio *bbio = btrfs_bio(bio);

btrfs_bio_counter_dec(bioc->fs_info);

@@ -393,8 +403,18 @@ static void btrfs_orig_write_end_io(struct bio *bio)
btrfs_put_bioc(bioc);
}

-static void btrfs_clone_write_end_io(struct bio *bio)
+static void btrfs_orig_write_end_io(struct bio *bio)
+{
+ struct btrfs_bio *bbio = btrfs_bio(bio);
+
+ INIT_WORK(&bbio->end_io_work, orig_write_end_io_work);
+ queue_work(btrfs_end_io_wq(bbio->inode->root->fs_info, bio), &bbio->end_io_work);
+}
+
+static void clone_write_end_io_work(struct work_struct *work)
{
+ struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
+ struct bio *bio = &bbio->bio;
struct btrfs_io_stripe *stripe = bio->bi_private;

if (bio->bi_status) {
@@ -409,8 +429,18 @@ static void btrfs_clone_write_end_io(struct bio *bio)
bio_put(bio);
}

+static void btrfs_clone_write_end_io(struct bio *bio)
+{
+ struct btrfs_bio *bbio = btrfs_bio(bio);
+
+ INIT_WORK(&bbio->end_io_work, clone_write_end_io_work);
+ queue_work(btrfs_end_io_wq(bbio->inode->root->fs_info, bio), &bbio->end_io_work);
+}
+
static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
{
+ u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+
if (!dev || !dev->bdev ||
test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
(btrfs_op(bio) == BTRFS_MAP_WRITE &&
@@ -425,12 +455,13 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
* For zone append writing, bi_sector must point the beginning of the
* zone
*/
- if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
- u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ if (btrfs_bio(bio)->can_use_append && btrfs_dev_is_sequential(dev, physical)) {
u64 zone_start = round_down(physical, dev->fs_info->zone_size);

ASSERT(btrfs_dev_is_sequential(dev, physical));
bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
+ bio->bi_opf &= ~REQ_OP_WRITE;
+ bio->bi_opf |= REQ_OP_ZONE_APPEND;
}
btrfs_debug(dev->fs_info,
"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
@@ -455,6 +486,7 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
{
struct bio *orig_bio = bioc->orig_bio, *bio;
+ struct btrfs_bio *orig_bbio = btrfs_bio(orig_bio);

ASSERT(bio_op(orig_bio) != REQ_OP_READ);

@@ -463,8 +495,11 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
bio = orig_bio;
bio->bi_end_io = btrfs_orig_write_end_io;
} else {
- bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
+ /* We need to use endio_work to run end_io in task context. */
+ bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &btrfs_bioset);
bio_inc_remaining(orig_bio);
+ btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode,
+ orig_bbio->file_offset, NULL, NULL);
bio->bi_end_io = btrfs_clone_write_end_io;
}

@@ -509,7 +544,11 @@ static int btrfs_bio_csum(struct btrfs_bio *bbio)
{
if (bbio->bio.bi_opf & REQ_META)
return btree_csum_one_bio(bbio);
- return btrfs_csum_one_bio(bbio);
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ return btrfs_csum_one_bio(bbio, true);
+#else
+ return btrfs_csum_one_bio(bbio, false);
+#endif
}

/*
@@ -581,20 +620,25 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)

static bool should_async_write(struct btrfs_bio *bbio)
{
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
bool auto_csum_mode = true;

#ifdef CONFIG_BTRFS_EXPERIMENTAL
- struct btrfs_fs_devices *fs_devices = bbio->fs_info->fs_devices;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);

- if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_OFF)
- return false;
-
- auto_csum_mode = (csum_mode == BTRFS_OFFLOAD_CSUM_AUTO);
+ if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_ON)
+ return true;
+ /*
+ * Write bios will calculate checksum and submit bio at the same time.
+ * Unless explicitly required don't offload serial csum calculate and bio
+ * submit into a workqueue.
+ */
+ return false;
#endif

/* Submit synchronously if the checksum implementation is fast. */
- if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
+ if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
return false;

/*
@@ -605,7 +649,7 @@ static bool should_async_write(struct btrfs_bio *bbio)
return false;

/* Zoned devices require I/O to be submitted in order. */
- if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(bbio->fs_info))
+ if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(fs_info))
return false;

return true;
@@ -620,7 +664,7 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
struct btrfs_io_context *bioc,
struct btrfs_io_stripe *smap, int mirror_num)
{
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
struct async_submit_bio *async;

async = kmalloc(sizeof(*async), GFP_NOFS);
@@ -639,11 +683,12 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,

static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
{
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
unsigned int nr_segs;
int sector_offset;

- map_length = min(map_length, bbio->fs_info->max_zone_append_size);
- sector_offset = bio_split_rw_at(&bbio->bio, &bbio->fs_info->limits,
+ map_length = min(map_length, fs_info->max_zone_append_size);
+ sector_offset = bio_split_rw_at(&bbio->bio, &fs_info->limits,
&nr_segs, map_length);
if (sector_offset) {
/*
@@ -651,7 +696,7 @@ static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
* sectorsize and thus cause unaligned I/Os. Fix that by
* always rounding down to the nearest boundary.
*/
- return ALIGN_DOWN(sector_offset << SECTOR_SHIFT, bbio->fs_info->sectorsize);
+ return ALIGN_DOWN(sector_offset << SECTOR_SHIFT, fs_info->sectorsize);
}
return map_length;
}
@@ -659,18 +704,17 @@ static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
{
struct btrfs_inode *inode = bbio->inode;
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio = &bbio->bio;
u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 length = bio->bi_iter.bi_size;
u64 map_length = length;
- bool use_append = btrfs_use_zone_append(bbio);
struct btrfs_io_context *bioc = NULL;
struct btrfs_io_stripe smap;
blk_status_t status;
int ret;

- if (!bbio->inode || btrfs_is_data_reloc_root(inode->root))
+ if (bbio->is_scrub || btrfs_is_data_reloc_root(inode->root))
smap.rst_search_commit_root = true;
else
smap.rst_search_commit_root = false;
@@ -684,8 +728,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
goto end_bbio;
}

+ bbio->can_use_append = btrfs_use_zone_append(bbio);
+
map_length = min(map_length, length);
- if (use_append)
+ if (bbio->can_use_append)
map_length = btrfs_append_map_length(bbio, map_length);

if (map_length < length) {
@@ -714,11 +760,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
}

if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
- if (use_append) {
- bio->bi_opf &= ~REQ_OP_WRITE;
- bio->bi_opf |= REQ_OP_ZONE_APPEND;
- }
-
if (is_data_bbio(bbio) && bioc && bioc->use_rst) {
/*
* No locking for the list update, as we only add to
@@ -734,7 +775,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
* Csum items for reloc roots have already been cloned at this
* point, so they are handled as part of the no-checksum case.
*/
- if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) &&
+ if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
!test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) &&
!btrfs_is_data_reloc_root(inode->root)) {
if (should_async_write(bbio) &&
@@ -745,7 +786,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
status = errno_to_blk_status(ret);
if (status)
goto fail;
- } else if (use_append ||
+ } else if (bbio->can_use_append ||
(btrfs_is_zoned(fs_info) && inode &&
inode->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_alloc_dummy_sum(bbio);
@@ -782,7 +823,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
static void assert_bbio_alignment(struct btrfs_bio *bbio)
{
#ifdef CONFIG_BTRFS_ASSERT
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
struct bio_vec bvec;
struct bvec_iter iter;
const u32 blocksize = fs_info->sectorsize;
@@ -885,16 +926,16 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
*/
void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace)
{
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
u64 length = bbio->bio.bi_iter.bi_size;
struct btrfs_io_stripe smap = { 0 };
int ret;

- ASSERT(fs_info);
ASSERT(mirror_num > 0);
ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
- ASSERT(!bbio->inode);
+ ASSERT(!is_data_inode(bbio->inode));
+ ASSERT(bbio->is_scrub);

btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num);
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index 00883aea55d7..69fe54f564fc 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -34,7 +34,10 @@ typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
struct btrfs_bio {
/*
* Inode and offset into it that this I/O operates on.
- * Only set for data I/O.
+ *
+ * If the inode is a data one, csum verification and read-repair
+ * will be done automatically.
+ * If the inode is a metadata one, everything is handled by the caller.
*/
struct btrfs_inode *inode;
u64 file_offset;
@@ -60,6 +63,9 @@ struct btrfs_bio {
struct {
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_sum *sums;
+ struct work_struct csum_work;
+ struct completion csum_done;
+ struct bvec_iter csum_saved_iter;
u64 orig_physical;
};

@@ -76,14 +82,24 @@ struct btrfs_bio {
atomic_t pending_ios;
struct work_struct end_io_work;

- /* File system that this I/O operates on. */
- struct btrfs_fs_info *fs_info;
-
/* Save the first error status of split bio. */
blk_status_t status;

/* Use the commit root to look up csums (data read bio only). */
bool csum_search_commit_root;
+
+ /*
+ * Since scrub will reuse btree inode, we need this flag to distinguish
+ * scrub bios.
+ */
+ bool is_scrub;
+
+ /* Whether the csum generation for data write is async. */
+ bool async_csum;
+
+ /* Whether the bio is written using zone append. */
+ bool can_use_append;
+
/*
* This member must come last, bio_alloc_bioset will allocate enough
* bytes for entire btrfs_bio but relies on bio being last.
@@ -99,10 +115,10 @@ static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
int __init btrfs_bioset_init(void);
void __cold btrfs_bioset_exit(void);

-void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
+void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode, u64 file_offset,
btrfs_bio_end_io_t end_io, void *private);
struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
- struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode, u64 file_offset,
btrfs_bio_end_io_t end_io, void *private);
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 8bf501fbcc0b..f7f6d8cb3311 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -3681,6 +3681,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
return ret;
}

+static void btrfs_maybe_reset_size_class(struct btrfs_block_group *bg)
+{
+ lockdep_assert_held(&bg->lock);
+ if (btrfs_block_group_should_use_size_class(bg) &&
+ bg->used == 0 && bg->reserved == 0)
+ bg->size_class = BTRFS_BG_SZ_NONE;
+}
+
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, bool alloc)
{
@@ -3745,6 +3753,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
old_val -= num_bytes;
cache->used = old_val;
cache->pinned += num_bytes;
+ btrfs_maybe_reset_size_class(cache);
btrfs_space_info_update_bytes_pinned(space_info, num_bytes);
space_info->bytes_used -= num_bytes;
space_info->disk_used -= num_bytes * factor;
@@ -3836,7 +3845,7 @@ int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
* that happens.
*/
if (num_bytes < ram_bytes)
- btrfs_try_granting_tickets(cache->fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
out:
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@@ -3859,22 +3868,26 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes,
bool is_delalloc)
{
struct btrfs_space_info *space_info = cache->space_info;
+ bool bg_ro;

spin_lock(&space_info->lock);
spin_lock(&cache->lock);
- if (cache->ro)
+ bg_ro = cache->ro;
+ cache->reserved -= num_bytes;
+ btrfs_maybe_reset_size_class(cache);
+ if (is_delalloc)
+ cache->delalloc_bytes -= num_bytes;
+ spin_unlock(&cache->lock);
+
+ if (bg_ro)
space_info->bytes_readonly += num_bytes;
else if (btrfs_is_zoned(cache->fs_info))
space_info->bytes_zone_unusable += num_bytes;
- cache->reserved -= num_bytes;
+
space_info->bytes_reserved -= num_bytes;
space_info->max_extent_size = 0;

- if (is_delalloc)
- cache->delalloc_bytes -= num_bytes;
- spin_unlock(&cache->lock);
-
- btrfs_try_granting_tickets(cache->fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
}

diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index 5ad6de738aee..fc378d2038a2 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -278,10 +278,11 @@ u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *target = NULL;

/*
- * If we are a delayed block reserve then push to the global rsv,
- * otherwise dump into the global delayed reserve if it is not full.
+ * If we are a delayed refs block reserve then push to the global
+ * reserve, otherwise dump into the global delayed refs reserve if it is
+ * not full.
*/
- if (block_rsv->type == BTRFS_BLOCK_RSV_DELOPS)
+ if (block_rsv->type == BTRFS_BLOCK_RSV_DELREFS)
target = global_rsv;
else if (block_rsv != global_rsv && !btrfs_block_rsv_full(delayed_rsv))
target = delayed_rsv;
@@ -387,7 +388,7 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
num_bytes = block_rsv->reserved - block_rsv->size;
btrfs_space_info_update_bytes_may_use(sinfo, -num_bytes);
block_rsv->reserved = block_rsv->size;
- btrfs_try_granting_tickets(fs_info, sinfo);
+ btrfs_try_granting_tickets(sinfo);
}

block_rsv->full = (block_rsv->reserved == block_rsv->size);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index af373d50a901..a66ca5531b5c 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -18,20 +18,20 @@
#include <linux/lockdep.h>
#include <uapi/linux/btrfs_tree.h>
#include <trace/events/btrfs.h>
+#include "ctree.h"
#include "block-rsv.h"
#include "extent_map.h"
-#include "extent_io.h"
#include "extent-io-tree.h"
-#include "ordered-data.h"
-#include "delayed-inode.h"

-struct extent_state;
struct posix_acl;
struct iov_iter;
struct writeback_control;
struct btrfs_root;
struct btrfs_fs_info;
struct btrfs_trans_handle;
+struct btrfs_bio;
+struct btrfs_file_extent;
+struct btrfs_delayed_node;

/*
* Since we search a directory based on f_pos (struct dir_context::pos) we have
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index bacad18357b3..8c3899832a1a 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -67,9 +67,7 @@ static struct compressed_bio *alloc_compressed_bio(struct btrfs_inode *inode,

bbio = btrfs_bio(bio_alloc_bioset(NULL, BTRFS_MAX_COMPRESSED_PAGES, op,
GFP_NOFS, &btrfs_compressed_bioset));
- btrfs_bio_init(bbio, inode->root->fs_info, end_io, NULL);
- bbio->inode = inode;
- bbio->file_offset = start;
+ btrfs_bio_init(bbio, inode, start, end_io, NULL);
return to_compressed_bio(bbio);
}

@@ -354,7 +352,7 @@ static void end_bbio_compressed_write(struct btrfs_bio *bbio)

static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb)
{
- struct btrfs_fs_info *fs_info = cb->bbio.fs_info;
+ struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info;
struct bio *bio = &cb->bbio.bio;
u32 offset = 0;

diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index eba188a9e3bb..062ebd9c2d32 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -14,14 +14,12 @@
#include <linux/pagemap.h>
#include "bio.h"
#include "fs.h"
-#include "messages.h"
+#include "btrfs_inode.h"

struct address_space;
-struct page;
struct inode;
struct btrfs_inode;
struct btrfs_ordered_extent;
-struct btrfs_bio;

/*
* We want to make sure that amount of RAM required to uncompress an extent is
@@ -77,7 +75,7 @@ struct compressed_bio {

static inline struct btrfs_fs_info *cb_to_fs_info(const struct compressed_bio *cb)
{
- return cb->bbio.fs_info;
+ return cb->bbio.inode->root->fs_info;
}

/* @range_end must be exclusive. */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fe70b593c7cd..16dd11c48531 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -17,9 +17,7 @@
#include <linux/refcount.h>
#include <uapi/linux/btrfs_tree.h>
#include "locking.h"
-#include "fs.h"
#include "accessors.h"
-#include "extent-io-tree.h"

struct extent_buffer;
struct btrfs_block_rsv;
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 7b277934f66f..a4cc1bc63562 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -15,6 +15,7 @@
#include "defrag.h"
#include "file-item.h"
#include "super.h"
+#include "compression.h"

static struct kmem_cache *btrfs_inode_defrag_cachep;

diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 69863e398e22..77e1bcb2a74b 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -9,6 +9,7 @@
#include "transaction.h"
#include "accessors.h"
#include "dir-item.h"
+#include "delayed-inode.h"

/*
* insert a name into a directory, doing overflow properly if there is a hash
diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c
index 802d4dbe5b38..e29ea28ce90b 100644
--- a/fs/btrfs/direct-io.c
+++ b/fs/btrfs/direct-io.c
@@ -10,6 +10,8 @@
#include "fs.h"
#include "transaction.h"
#include "volumes.h"
+#include "bio.h"
+#include "ordered-data.h"

struct btrfs_dio_data {
ssize_t submitted;
@@ -713,10 +715,8 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
container_of(bbio, struct btrfs_dio_private, bbio);
struct btrfs_dio_data *dio_data = iter->private;

- btrfs_bio_init(bbio, BTRFS_I(iter->inode)->root->fs_info,
+ btrfs_bio_init(bbio, BTRFS_I(iter->inode), file_offset,
btrfs_dio_end_io, bio->bi_private);
- bbio->inode = BTRFS_I(iter->inode);
- bbio->file_offset = file_offset;

dip->file_offset = file_offset;
dip->bytes = bio->bi_iter.bi_size;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 745ae698bbc8..3fd5d6a27d4c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -50,6 +50,7 @@
#include "relocation.h"
#include "scrub.h"
#include "super.h"
+#include "delayed-inode.h"

#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
BTRFS_HEADER_FLAG_RELOC |\
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 57920f2c6fe4..5320da83d0cf 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -9,7 +9,8 @@
#include <linux/sizes.h>
#include <linux/compiler_types.h>
#include "ctree.h"
-#include "fs.h"
+#include "bio.h"
+#include "ordered-data.h"

struct block_device;
struct super_block;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index dc4ca98c3780..01337e3f2879 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -40,6 +40,7 @@
#include "orphan.h"
#include "tree-checker.h"
#include "raid-stripe-tree.h"
+#include "delayed-inode.h"

#undef SCRAMBLE_DELAYED_REFS

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e6ffa12f5753..c3524401ff03 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -518,7 +518,7 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le
*/
static void end_bbio_data_write(struct btrfs_bio *bbio)
{
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
struct bio *bio = &bbio->bio;
int error = blk_status_to_errno(bio->bi_status);
struct folio_iter fi;
@@ -574,7 +574,7 @@ static void begin_folio_read(struct btrfs_fs_info *fs_info, struct folio *folio)
*/
static void end_bbio_data_read(struct btrfs_bio *bbio)
{
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
struct bio *bio = &bbio->bio;
struct folio_iter fi;

@@ -739,12 +739,10 @@ static void alloc_new_bio(struct btrfs_inode *inode,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_bio *bbio;

- bbio = btrfs_bio_alloc(BIO_MAX_VECS, bio_ctrl->opf, fs_info,
- bio_ctrl->end_io_func, NULL);
+ bbio = btrfs_bio_alloc(BIO_MAX_VECS, bio_ctrl->opf, inode,
+ file_offset, bio_ctrl->end_io_func, NULL);
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
bbio->bio.bi_write_hint = inode->vfs_inode.i_write_hint;
- bbio->inode = inode;
- bbio->file_offset = file_offset;
bio_ctrl->bbio = bbio;
bio_ctrl->len_to_oe_boundary = U32_MAX;
bio_ctrl->next_file_offset = file_offset;
@@ -2223,12 +2221,11 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,

bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES,
REQ_OP_WRITE | REQ_META | wbc_to_write_flags(wbc),
- eb->fs_info, end_bbio_meta_write, eb);
+ BTRFS_I(fs_info->btree_inode), eb->start,
+ end_bbio_meta_write, eb);
bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT;
bio_set_dev(&bbio->bio, fs_info->fs_devices->latest_dev->bdev);
wbc_init_bio(wbc, &bbio->bio);
- bbio->inode = BTRFS_I(eb->fs_info->btree_inode);
- bbio->file_offset = eb->start;
for (int i = 0; i < num_extent_folios(eb); i++) {
struct folio *folio = eb->folios[i];
u64 range_start = max_t(u64, eb->start, folio_pos(folio));
@@ -3842,6 +3839,7 @@ static void end_bbio_meta_read(struct btrfs_bio *bbio)
int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
const struct btrfs_tree_parent_check *check)
{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_bio *bbio;

if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
@@ -3875,11 +3873,9 @@ int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num,
refcount_inc(&eb->refs);

bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES,
- REQ_OP_READ | REQ_META, eb->fs_info,
- end_bbio_meta_read, eb);
+ REQ_OP_READ | REQ_META, BTRFS_I(fs_info->btree_inode),
+ eb->start, end_bbio_meta_read, eb);
bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT;
- bbio->inode = BTRFS_I(eb->fs_info->btree_inode);
- bbio->file_offset = eb->start;
memcpy(&bbio->parent_check, check, sizeof(*check));
for (int i = 0; i < num_extent_folios(eb); i++) {
struct folio *folio = eb->folios[i];
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 559bec44a7a8..73571d5d3d5a 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -12,7 +12,6 @@
#include <linux/rwsem.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include "compression.h"
#include "messages.h"
#include "ulist.h"
#include "misc.h"
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index d4b81ee4d97b..6f685f3c9327 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -8,8 +8,7 @@
#include <linux/rbtree.h>
#include <linux/list.h>
#include <linux/refcount.h>
-#include "misc.h"
-#include "compression.h"
+#include "fs.h"

struct btrfs_inode;
struct btrfs_fs_info;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a42e6d54e7cd..4b7c40f05e8f 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -18,6 +18,7 @@
#include "fs.h"
#include "accessors.h"
#include "file-item.h"
+#include "volumes.h"

#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
sizeof(struct btrfs_item) * 2) / \
@@ -764,21 +765,46 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
return ret;
}

+static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src)
+{
+ struct btrfs_inode *inode = bbio->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+ struct bio *bio = &bbio->bio;
+ struct btrfs_ordered_sum *sums = bbio->sums;
+ struct bvec_iter iter = *src;
+ phys_addr_t paddr;
+ const u32 blocksize = fs_info->sectorsize;
+ int index = 0;
+
+ shash->tfm = fs_info->csum_shash;
+
+ btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) {
+ btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index);
+ index += fs_info->csum_size;
+ }
+}
+
+static void csum_one_bio_work(struct work_struct *work)
+{
+ struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, csum_work);
+
+ ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
+ ASSERT(bbio->async_csum == true);
+ csum_one_bio(bbio, &bbio->csum_saved_iter);
+ complete(&bbio->csum_done);
+}
+
/*
* Calculate checksums of the data contained inside a bio.
*/
-int btrfs_csum_one_bio(struct btrfs_bio *bbio)
+int btrfs_csum_one_bio(struct btrfs_bio *bbio, bool async)
{
struct btrfs_ordered_extent *ordered = bbio->ordered;
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct bio *bio = &bbio->bio;
struct btrfs_ordered_sum *sums;
- struct bvec_iter iter = bio->bi_iter;
- phys_addr_t paddr;
- const u32 blocksize = fs_info->sectorsize;
- int index;
unsigned nofs_flag;

nofs_flag = memalloc_nofs_save();
@@ -789,21 +815,21 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio)
if (!sums)
return -ENOMEM;

+ sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
-
- sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
- index = 0;
-
- shash->tfm = fs_info->csum_shash;
-
- btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) {
- btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index);
- index += fs_info->csum_size;
- }
-
bbio->sums = sums;
btrfs_add_ordered_sum(ordered, sums);
+
+ if (!async) {
+ csum_one_bio(bbio, &bbio->bio.bi_iter);
+ return 0;
+ }
+ init_completion(&bbio->csum_done);
+ bbio->async_csum = true;
+ bbio->csum_saved_iter = bbio->bio.bi_iter;
+ INIT_WORK(&bbio->csum_work, csum_one_bio_work);
+ schedule_work(&bbio->csum_work);
return 0;
}

diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h
index 63216c43676d..5645c5e3abdb 100644
--- a/fs/btrfs/file-item.h
+++ b/fs/btrfs/file-item.h
@@ -7,7 +7,7 @@
#include <linux/list.h>
#include <uapi/linux/btrfs_tree.h>
#include "ctree.h"
-#include "accessors.h"
+#include "ordered-data.h"

struct extent_map;
struct btrfs_file_extent_item;
@@ -64,7 +64,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct btrfs_bio *bbio);
+int btrfs_csum_one_bio(struct btrfs_bio *bbio, bool async);
int btrfs_alloc_dummy_sum(struct btrfs_bio *bbio);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 76a66c74249a..47e762856521 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -71,6 +71,7 @@
#include "backref.h"
#include "raid-stripe-tree.h"
#include "fiemap.h"
+#include "delayed-inode.h"

#define COW_FILE_RANGE_KEEP_LOCKED (1UL << 0)
#define COW_FILE_RANGE_NO_INLINE (1UL << 1)
@@ -9422,7 +9423,6 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
u64 disk_bytenr, u64 disk_io_size,
struct page **pages, void *uring_ctx)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_encoded_read_private *priv, sync_priv;
struct completion sync_reads;
unsigned long i = 0;
@@ -9447,10 +9447,9 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
priv->status = 0;
priv->uring_ctx = uring_ctx;

- bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
+ bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, inode, 0,
btrfs_encoded_read_endio, priv);
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
- bbio->inode = inode;

do {
size_t bytes = min_t(u64, disk_io_size, PAGE_SIZE);
@@ -9459,10 +9458,9 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
refcount_inc(&priv->pending_refs);
btrfs_submit_bbio(bbio, 0);

- bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
+ bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, inode, 0,
btrfs_encoded_read_endio, priv);
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
- bbio->inode = inode;
continue;
}

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index febc22d1b648..302bb3ecf39a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1157,11 +1157,14 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
}
if (ret > 0) {
/*
- * Shouldn't happen, but in case it does we
- * don't need to do the btrfs_next_item, just
- * continue.
+ * Shouldn't happen because the key should still
+ * be there (return 0), but in case it does it
+ * means we have reached the end of the tree -
+ * there are no more leaves with items that have
+ * a key greater than or equals to @found_key,
+ * so just stop the search loop.
*/
- continue;
+ break;
}
}
ret = btrfs_next_item(tree_root, path);
@@ -1627,8 +1630,10 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
if (ret < 0 && ret != -ENOENT)
goto out;
ret2 = del_qgroup_relation_item(trans, dst, src);
- if (ret2 < 0 && ret2 != -ENOENT)
+ if (ret2 < 0 && ret2 != -ENOENT) {
+ ret = ret2;
goto out;
+ }

/* At least one deletion succeeded, return 0 */
if (!ret || !ret2)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b6a7ea105eb1..747e2c748376 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -929,10 +929,11 @@ static int calc_next_mirror(int mirror, int num_copies)
static void scrub_bio_add_sector(struct btrfs_bio *bbio, struct scrub_stripe *stripe,
int sector_nr)
{
+ struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
void *kaddr = scrub_stripe_get_kaddr(stripe, sector_nr);
int ret;

- ret = bio_add_page(&bbio->bio, virt_to_page(kaddr), bbio->fs_info->sectorsize,
+ ret = bio_add_page(&bbio->bio, virt_to_page(kaddr), fs_info->sectorsize,
offset_in_page(kaddr));
/*
* Caller should ensure the bbio has enough size.
@@ -942,7 +943,21 @@ static void scrub_bio_add_sector(struct btrfs_bio *bbio, struct scrub_stripe *st
* to create the minimal amount of bio vectors, for fs block size < page
* size cases.
*/
- ASSERT(ret == bbio->fs_info->sectorsize);
+ ASSERT(ret == fs_info->sectorsize);
+}
+
+static struct btrfs_bio *alloc_scrub_bbio(struct btrfs_fs_info *fs_info,
+ unsigned int nr_vecs, blk_opf_t opf,
+ u64 logical,
+ btrfs_bio_end_io_t end_io, void *private)
+{
+ struct btrfs_bio *bbio;
+
+ bbio = btrfs_bio_alloc(nr_vecs, opf, BTRFS_I(fs_info->btree_inode),
+ logical, end_io, private);
+ bbio->is_scrub = true;
+ bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
+ return bbio;
}

static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
@@ -968,12 +983,10 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
bbio = NULL;
}

- if (!bbio) {
- bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
- fs_info, scrub_repair_read_endio, stripe);
- bbio->bio.bi_iter.bi_sector = (stripe->logical +
- (i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT;
- }
+ if (!bbio)
+ bbio = alloc_scrub_bbio(fs_info, stripe->nr_sectors, REQ_OP_READ,
+ stripe->logical + (i << fs_info->sectorsize_bits),
+ scrub_repair_read_endio, stripe);

scrub_bio_add_sector(bbio, stripe, i);
}
@@ -1352,13 +1365,10 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
bbio = NULL;
}
- if (!bbio) {
- bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_WRITE,
- fs_info, scrub_write_endio, stripe);
- bbio->bio.bi_iter.bi_sector = (stripe->logical +
- (sector_nr << fs_info->sectorsize_bits)) >>
- SECTOR_SHIFT;
- }
+ if (!bbio)
+ bbio = alloc_scrub_bbio(fs_info, stripe->nr_sectors, REQ_OP_WRITE,
+ stripe->logical + (sector_nr << fs_info->sectorsize_bits),
+ scrub_write_endio, stripe);
scrub_bio_add_sector(bbio, stripe, sector_nr);
}
if (bbio)
@@ -1849,9 +1859,8 @@ static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe)
continue;
}

- bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
- fs_info, scrub_read_endio, stripe);
- bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
+ bbio = alloc_scrub_bbio(fs_info, stripe->nr_sectors, REQ_OP_READ,
+ logical, scrub_read_endio, stripe);
}

scrub_bio_add_sector(bbio, stripe, i);
@@ -1888,10 +1897,8 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
return;
}

- bbio = btrfs_bio_alloc(BTRFS_STRIPE_LEN >> min_folio_shift, REQ_OP_READ, fs_info,
- scrub_read_endio, stripe);
-
- bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT;
+ bbio = alloc_scrub_bbio(fs_info, BTRFS_STRIPE_LEN >> min_folio_shift, REQ_OP_READ,
+ stripe->logical, scrub_read_endio, stripe);
/* Read the whole range inside the chunk boundary. */
for (unsigned int cur = 0; cur < nr_sectors; cur++)
scrub_bio_add_sector(bbio, stripe, cur);
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index a6f94e9f5591..474ed47095ba 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -15,6 +15,7 @@
#include "accessors.h"
#include "extent-tree.h"
#include "zoned.h"
+#include "delayed-inode.h"

/*
* HOW DOES SPACE RESERVATION WORK
@@ -377,7 +378,7 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
btrfs_space_info_update_bytes_zone_unusable(space_info, block_group->zone_unusable);
if (block_group->length > 0)
space_info->full = false;
- btrfs_try_granting_tickets(info, space_info);
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);

block_group->space_info = space_info;
@@ -527,9 +528,9 @@ static void remove_ticket(struct btrfs_space_info *space_info,
* This is for space we already have accounted in space_info->bytes_may_use, so
* basically when we're returning space from block_rsv's.
*/
-void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info)
+void btrfs_try_granting_tickets(struct btrfs_space_info *space_info)
{
+ struct btrfs_fs_info *fs_info = space_info->fs_info;
struct list_head *head;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;

@@ -1128,7 +1129,7 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
* the list.
*/
if (!aborted)
- btrfs_try_granting_tickets(fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
}
return (tickets_id != space_info->tickets_id);
}
@@ -1548,7 +1549,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
* ticket in front of a smaller ticket that can now be satisfied with
* the available space.
*/
- btrfs_try_granting_tickets(fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
}

@@ -1576,7 +1577,7 @@ static void priority_reclaim_data_space(struct btrfs_fs_info *fs_info,

ticket->error = -ENOSPC;
remove_ticket(space_info, ticket);
- btrfs_try_granting_tickets(fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
}

@@ -2199,5 +2200,5 @@ void btrfs_return_free_space(struct btrfs_space_info *space_info, u64 len)
grant:
/* Add to any tickets we may have. */
if (len)
- btrfs_try_granting_tickets(fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
}
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index a846f63585c9..596a1e923ddf 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -283,8 +283,7 @@ int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 orig_bytes,
enum btrfs_reserve_flush_enum flush);
-void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info);
+void btrfs_try_granting_tickets(struct btrfs_space_info *space_info);
int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
const struct btrfs_space_info *space_info, u64 bytes,
enum btrfs_reserve_flush_enum flush);
@@ -295,7 +294,7 @@ static inline void btrfs_space_info_free_bytes_may_use(
{
spin_lock(&space_info->lock);
btrfs_space_info_update_bytes_may_use(space_info, -num_bytes);
- btrfs_try_granting_tickets(space_info->fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
}
int btrfs_reserve_data_bytes(struct btrfs_space_info *space_info, u64 bytes,
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index ad0552db7c7d..d81a0ade559f 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -7,7 +7,6 @@
#include <linux/atomic.h>
#include <linux/sizes.h>
#include "btrfs_inode.h"
-#include "fs.h"

struct address_space;
struct folio;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c457316c2788..089712b15d60 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,6 +32,8 @@
#include "ioctl.h"
#include "relocation.h"
#include "scrub.h"
+#include "ordered-data.h"
+#include "delayed-inode.h"

static struct kmem_cache *btrfs_trans_handle_cachep;

@@ -724,7 +726,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,

h->type = type;
INIT_LIST_HEAD(&h->new_bgs);
- btrfs_init_metadata_block_rsv(fs_info, &h->delayed_rsv, BTRFS_BLOCK_RSV_DELOPS);
+ btrfs_init_metadata_block_rsv(fs_info, &h->delayed_rsv, BTRFS_BLOCK_RSV_DELREFS);

smp_mb();
if (cur_trans->state >= TRANS_STATE_COMMIT_START &&
@@ -2482,13 +2484,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
list_add_tail(&fs_info->chunk_root->dirty_list,
&cur_trans->switch_commits);

- if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
- btrfs_set_root_node(&fs_info->block_group_root->root_item,
- fs_info->block_group_root->node);
- list_add_tail(&fs_info->block_group_root->dirty_list,
- &cur_trans->switch_commits);
- }
-
switch_commit_roots(trans);

ASSERT(list_empty(&cur_trans->dirty_bgs));
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 9f7c777af635..18ef069197e5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -14,10 +14,6 @@
#include <linux/wait.h>
#include "btrfs_inode.h"
#include "delayed-ref.h"
-#include "extent-io-tree.h"
-#include "block-rsv.h"
-#include "messages.h"
-#include "misc.h"

struct dentry;
struct inode;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ae2e035d013e..6c5db73c3e85 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -29,6 +29,7 @@
#include "orphan.h"
#include "print-tree.h"
#include "tree-checker.h"
+#include "delayed-inode.h"

#define MAX_CONFLICT_INODES 10

diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index dc313e6bb2fa..4f149d7d4fde 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -8,8 +8,7 @@

#include <linux/list.h>
#include <linux/fs.h>
-#include "messages.h"
-#include "ctree.h"
+#include <linux/fscrypt.h>
#include "transaction.h"

struct inode;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8e7dcb12af4c..645bf98a9571 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1506,30 +1506,158 @@ struct btrfs_device *btrfs_scan_one_device(const char *path,
}

/*
- * Try to find a chunk that intersects [start, start + len] range and when one
- * such is found, record the end of it in *start
+ * Find the first pending extent intersecting a range.
+ *
+ * @device: the device to search
+ * @start: start of the range to check
+ * @len: length of the range to check
+ * @pending_start: output pointer for the start of the found pending extent
+ * @pending_end: output pointer for the end of the found pending extent (inclusive)
+ *
+ * Search for a pending chunk allocation that intersects the half-open range
+ * [start, start + len).
+ *
+ * Return: true if a pending extent was found, false otherwise.
+ * If the return value is true, store the first pending extent in
+ * [*pending_start, *pending_end]. Otherwise, the two output variables
+ * may still be modified, to something outside the range and should not
+ * be used.
*/
-static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
- u64 len)
+static bool first_pending_extent(struct btrfs_device *device, u64 start, u64 len,
+ u64 *pending_start, u64 *pending_end)
{
- u64 physical_start, physical_end;
-
lockdep_assert_held(&device->fs_info->chunk_mutex);

- if (btrfs_find_first_extent_bit(&device->alloc_state, *start,
- &physical_start, &physical_end,
+ if (btrfs_find_first_extent_bit(&device->alloc_state, start,
+ pending_start, pending_end,
CHUNK_ALLOCATED, NULL)) {

- if (in_range(physical_start, *start, len) ||
- in_range(*start, physical_start,
- physical_end + 1 - physical_start)) {
- *start = physical_end + 1;
+ if (in_range(*pending_start, start, len) ||
+ in_range(start, *pending_start, *pending_end + 1 - *pending_start)) {
return true;
}
}
return false;
}

+/*
+ * Find the first real hole accounting for pending extents.
+ *
+ * @device: the device containing the candidate hole
+ * @start: input/output pointer for the hole start position
+ * @len: input/output pointer for the hole length
+ * @min_hole_size: the size of hole we are looking for
+ *
+ * Given a potential hole specified by [*start, *start + *len), check for pending
+ * chunk allocations within that range. If pending extents are found, the hole is
+ * adjusted to represent the first true free space that is large enough when
+ * accounting for pending chunks.
+ *
+ * Note that this function must handle various cases involving non consecutive
+ * pending extents.
+ *
+ * Returns: true if a suitable hole was found and false otherwise.
+ * If the return value is true, then *start and *len are set to represent the hole.
+ * If the return value is false, then *start is set to the largest hole we
+ * found and *len is set to its length.
+ * If there are no holes at all, then *start is set to the end of the range and
+ * *len is set to 0.
+ */
+static bool find_hole_in_pending_extents(struct btrfs_device *device, u64 *start,
+ u64 *len, u64 min_hole_size)
+{
+ u64 pending_start, pending_end;
+ u64 end;
+ u64 max_hole_start = 0;
+ u64 max_hole_len = 0;
+
+ lockdep_assert_held(&device->fs_info->chunk_mutex);
+
+ if (*len == 0)
+ return false;
+
+ end = *start + *len - 1;
+
+ /*
+ * Loop until we either see a large enough hole or check every pending
+ * extent overlapping the candidate hole.
+ * At every hole that we observe, record it if it is the new max.
+ * At the end of the iteration, set the output variables to the max hole.
+ */
+ while (true) {
+ if (first_pending_extent(device, *start, *len, &pending_start, &pending_end)) {
+ /*
+ * Case 1: the pending extent overlaps the start of
+ * candidate hole. That means the true hole is after the
+ * pending extent, but we need to find the next pending
+ * extent to properly size the hole. In the next loop,
+ * we will reduce to case 2 or 3.
+ * e.g.,
+ *
+ * |----pending A----| real hole |----pending B----|
+ * | candidate hole |
+ * *start end
+ */
+ if (pending_start <= *start) {
+ *start = pending_end + 1;
+ goto next;
+ }
+ /*
+ * Case 2: The pending extent starts after *start (and overlaps
+ * [*start, end), so the first hole just goes up to the start
+ * of the pending extent.
+ * e.g.,
+ *
+ * | real hole |----pending A----|
+ * | candidate hole |
+ * *start end
+ */
+ *len = pending_start - *start;
+ if (*len > max_hole_len) {
+ max_hole_start = *start;
+ max_hole_len = *len;
+ }
+ if (*len >= min_hole_size)
+ break;
+ /*
+ * If the hole wasn't big enough, then we advance past
+ * the pending extent and keep looking.
+ */
+ *start = pending_end + 1;
+ goto next;
+ } else {
+ /*
+ * Case 3: There is no pending extent overlapping the
+ * range [*start, *start + *len - 1], so the only remaining
+ * hole is the remaining range.
+ * e.g.,
+ *
+ * | candidate hole |
+ * | real hole |
+ * *start end
+ */
+
+ if (*len > max_hole_len) {
+ max_hole_start = *start;
+ max_hole_len = *len;
+ }
+ break;
+ }
+next:
+ if (*start > end)
+ break;
+ *len = end - *start + 1;
+ }
+ if (max_hole_len) {
+ *start = max_hole_start;
+ *len = max_hole_len;
+ } else {
+ *start = end + 1;
+ *len = 0;
+ }
+ return max_hole_len >= min_hole_size;
+}
+
static u64 dev_extent_search_start(struct btrfs_device *device)
{
switch (device->fs_devices->chunk_alloc_policy) {
@@ -1594,59 +1722,57 @@ static bool dev_extent_hole_check_zoned(struct btrfs_device *device,
}

/*
- * Check if specified hole is suitable for allocation.
+ * Validate and adjust a hole for chunk allocation
+ *
+ * @device: the device containing the candidate hole
+ * @hole_start: input/output pointer for the hole start position
+ * @hole_size: input/output pointer for the hole size
+ * @num_bytes: minimum allocation size required
*
- * @device: the device which we have the hole
- * @hole_start: starting position of the hole
- * @hole_size: the size of the hole
- * @num_bytes: the size of the free space that we need
+ * Check if the specified hole is suitable for allocation and adjust it if
+ * necessary. The hole may be modified to skip over pending chunk allocations
+ * and to satisfy stricter zoned requirements on zoned filesystems.
*
- * This function may modify @hole_start and @hole_size to reflect the suitable
- * position for allocation. Returns 1 if hole position is updated, 0 otherwise.
+ * For regular (non-zoned) allocation, if the hole after adjustment is smaller
+ * than @num_bytes, the search continues past additional pending extents until
+ * either a sufficiently large hole is found or no more pending extents exist.
+ *
+ * Return: true if a suitable hole was found and false otherwise.
+ * If the return value is true, then *hole_start and *hole_size are set to
+ * represent the hole we found.
+ * If the return value is false, then *hole_start is set to the largest
+ * hole we found and *hole_size is set to its length.
+ * If there are no holes at all, then *hole_start is set to the end of the range
+ * and *hole_size is set to 0.
*/
static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
u64 *hole_size, u64 num_bytes)
{
- bool changed = false;
- u64 hole_end = *hole_start + *hole_size;
+ bool found = false;
+ const u64 hole_end = *hole_start + *hole_size - 1;

- for (;;) {
- /*
- * Check before we set max_hole_start, otherwise we could end up
- * sending back this offset anyway.
- */
- if (contains_pending_extent(device, hole_start, *hole_size)) {
- if (hole_end >= *hole_start)
- *hole_size = hole_end - *hole_start;
- else
- *hole_size = 0;
- changed = true;
- }
+ ASSERT(*hole_size > 0);

- switch (device->fs_devices->chunk_alloc_policy) {
- default:
- btrfs_warn_unknown_chunk_allocation(device->fs_devices->chunk_alloc_policy);
- fallthrough;
- case BTRFS_CHUNK_ALLOC_REGULAR:
- /* No extra check */
- break;
- case BTRFS_CHUNK_ALLOC_ZONED:
- if (dev_extent_hole_check_zoned(device, hole_start,
- hole_size, num_bytes)) {
- changed = true;
- /*
- * The changed hole can contain pending extent.
- * Loop again to check that.
- */
- continue;
- }
- break;
- }
+again:
+ *hole_size = hole_end - *hole_start + 1;
+ found = find_hole_in_pending_extents(device, hole_start, hole_size, num_bytes);
+ if (!found)
+ return found;
+ ASSERT(*hole_size >= num_bytes);

+ switch (device->fs_devices->chunk_alloc_policy) {
+ default:
+ btrfs_warn_unknown_chunk_allocation(device->fs_devices->chunk_alloc_policy);
+ fallthrough;
+ case BTRFS_CHUNK_ALLOC_REGULAR:
+ return found;
+ case BTRFS_CHUNK_ALLOC_ZONED:
+ if (dev_extent_hole_check_zoned(device, hole_start, hole_size, num_bytes))
+ goto again;
break;
}

- return changed;
+ return found;
}

/*
@@ -1705,7 +1831,7 @@ static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
ret = -ENOMEM;
goto out;
}
-again:
+
if (search_start >= search_end ||
test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
ret = -ENOSPC;
@@ -1792,11 +1918,7 @@ static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
*/
if (search_end > search_start) {
hole_size = search_end - search_start;
- if (dev_extent_hole_check(device, &search_start, &hole_size,
- num_bytes)) {
- btrfs_release_path(path);
- goto again;
- }
+ dev_extent_hole_check(device, &search_start, &hole_size, num_bytes);

if (hole_size > max_hole_size) {
max_hole_start = search_start;
@@ -4882,6 +5004,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
u64 diff;
u64 start;
u64 free_diff = 0;
+ u64 pending_start, pending_end;

new_size = round_down(new_size, fs_info->sectorsize);
start = new_size;
@@ -4927,7 +5050,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
* in-memory chunks are synced to disk so that the loop below sees them
* and relocates them accordingly.
*/
- if (contains_pending_extent(device, &start, diff)) {
+ if (first_pending_extent(device, start, diff, &pending_start, &pending_end)) {
mutex_unlock(&fs_info->chunk_mutex);
ret = btrfs_commit_transaction(trans);
if (ret)
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index d1db7fa1fe58..3afc9c0c2228 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1809,14 +1809,14 @@ bool btrfs_use_zone_append(struct btrfs_bio *bbio)
{
u64 start = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT);
struct btrfs_inode *inode = bbio->inode;
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_block_group *cache;
bool ret = false;

if (!btrfs_is_zoned(fs_info))
return false;

- if (!inode || !is_data_inode(inode))
+ if (!is_data_inode(inode))
return false;

if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE)
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 17c5656580dd..2b807a02d1a8 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -15,7 +15,6 @@
#include "disk-io.h"
#include "block-group.h"
#include "btrfs_inode.h"
-#include "fs.h"

struct block_device;
struct extent_buffer;
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index a47c6bab98ff..5b77ee8cc99f 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -25,23 +25,19 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
container_of(iocb, struct erofs_fileio_rq, iocb);
struct folio_iter fi;

- if (ret > 0) {
- if (ret != rq->bio.bi_iter.bi_size) {
- bio_advance(&rq->bio, ret);
- zero_fill_bio(&rq->bio);
- }
- ret = 0;
+ if (ret >= 0 && ret != rq->bio.bi_iter.bi_size) {
+ bio_advance(&rq->bio, ret);
+ zero_fill_bio(&rq->bio);
}
- if (rq->bio.bi_end_io) {
- if (ret < 0 && !rq->bio.bi_status)
- rq->bio.bi_status = errno_to_blk_status(ret);
- rq->bio.bi_end_io(&rq->bio);
- } else {
+ if (!rq->bio.bi_end_io) {
bio_for_each_folio_all(fi, &rq->bio) {
DBG_BUGON(folio_test_uptodate(fi.folio));
- erofs_onlinefolio_end(fi.folio, ret, false);
+ erofs_onlinefolio_end(fi.folio, ret < 0, false);
}
+ } else if (ret < 0 && !rq->bio.bi_status) {
+ rq->bio.bi_status = errno_to_blk_status(ret);
}
+ bio_endio(&rq->bio);
bio_uninit(&rq->bio);
if (refcount_dec_and_test(&rq->ref))
kfree(rq);
@@ -51,7 +47,7 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
{
const struct cred *old_cred;
struct iov_iter iter;
- int ret;
+ ssize_t ret;

if (!rq)
return;
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 362acf828279..7a346e20f7b7 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -185,7 +185,7 @@ static void erofs_fscache_bio_endio(void *priv, ssize_t transferred_or_error)

if (IS_ERR_VALUE(transferred_or_error))
io->bio.bi_status = errno_to_blk_status(transferred_or_error);
- io->bio.bi_end_io(&io->bio);
+ bio_endio(&io->bio);
BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0);
erofs_fscache_io_put(&io->io);
}
@@ -216,7 +216,7 @@ void erofs_fscache_submit_bio(struct bio *bio)
if (!ret)
return;
bio->bi_status = errno_to_blk_status(ret);
- bio->bi_end_io(bio);
+ bio_endio(bio);
}

static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 683703aee5ef..98e44570841a 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -805,14 +805,26 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
struct erofs_map_blocks *map = &fe->map;
struct super_block *sb = fe->inode->i_sb;
struct z_erofs_pcluster *pcl = NULL;
- void *ptr;
+ void *ptr = NULL;
int ret;

DBG_BUGON(fe->pcl);
/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous pcluster */
DBG_BUGON(!fe->head);

- if (!(map->m_flags & EROFS_MAP_META)) {
+ if (map->m_flags & EROFS_MAP_META) {
+ ret = erofs_init_metabuf(&map->buf, sb,
+ erofs_inode_in_metabox(fe->inode));
+ if (ret)
+ return ret;
+ ptr = erofs_bread(&map->buf, map->m_pa, false);
+ if (IS_ERR(ptr)) {
+ erofs_err(sb, "failed to read inline data %pe @ pa %llu of nid %llu",
+ ptr, map->m_pa, EROFS_I(fe->inode)->nid);
+ return PTR_ERR(ptr);
+ }
+ ptr = map->buf.page;
+ } else {
while (1) {
rcu_read_lock();
pcl = xa_load(&EROFS_SB(sb)->managed_pslots, map->m_pa);
@@ -852,18 +864,8 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
/* bind cache first when cached decompression is preferred */
z_erofs_bind_cache(fe);
} else {
- ret = erofs_init_metabuf(&map->buf, sb,
- erofs_inode_in_metabox(fe->inode));
- if (ret)
- return ret;
- ptr = erofs_bread(&map->buf, map->m_pa, false);
- if (IS_ERR(ptr)) {
- ret = PTR_ERR(ptr);
- erofs_err(sb, "failed to get inline folio %d", ret);
- return ret;
- }
- folio_get(page_folio(map->buf.page));
- WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page);
+ folio_get(page_folio((struct page *)ptr));
+ WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, ptr);
fe->pcl->pageofs_in = map->m_pa & ~PAGE_MASK;
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 57087da6c7be..933297251f66 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1771,6 +1771,10 @@ struct ext4_sb_info {
* Main fast commit lock. This lock protects accesses to the
* following fields:
* ei->i_fc_list, s_fc_dentry_q, s_fc_q, s_fc_bytes, s_fc_bh.
+ *
+ * s_fc_lock can be taken from reclaim context (inode eviction) and is
+ * thus reclaim unsafe. Use ext4_fc_lock()/ext4_fc_unlock() helpers
+ * when acquiring / releasing the lock.
*/
struct mutex s_fc_lock;
struct buffer_head *s_fc_bh;
@@ -1815,6 +1819,18 @@ static inline void ext4_writepages_up_write(struct super_block *sb, int ctx)
percpu_up_write(&EXT4_SB(sb)->s_writepages_rwsem);
}

+static inline int ext4_fc_lock(struct super_block *sb)
+{
+ mutex_lock(&EXT4_SB(sb)->s_fc_lock);
+ return memalloc_nofs_save();
+}
+
+static inline void ext4_fc_unlock(struct super_block *sb, int ctx)
+{
+ memalloc_nofs_restore(ctx);
+ mutex_unlock(&EXT4_SB(sb)->s_fc_lock);
+}
+
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ca5499e9412b..ae7f2d6b32e3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -43,8 +43,13 @@
#define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */
#define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */

-#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
-#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
+/* first half contains valid data */
+#define EXT4_EXT_DATA_ENTIRE_VALID1 0x8 /* has entirely valid data */
+#define EXT4_EXT_DATA_PARTIAL_VALID1 0x10 /* has partially valid data */
+#define EXT4_EXT_DATA_VALID1 (EXT4_EXT_DATA_ENTIRE_VALID1 | \
+ EXT4_EXT_DATA_PARTIAL_VALID1)
+
+#define EXT4_EXT_DATA_VALID2 0x20 /* second half contains valid data */

static __le32 ext4_extent_block_csum(struct inode *inode,
struct ext4_extent_header *eh)
@@ -3190,8 +3195,12 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
unsigned int ee_len, depth;
int err = 0;

- BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
- (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
+ BUG_ON((split_flag & EXT4_EXT_DATA_VALID1) == EXT4_EXT_DATA_VALID1);
+ BUG_ON((split_flag & EXT4_EXT_DATA_VALID1) &&
+ (split_flag & EXT4_EXT_DATA_VALID2));
+
+ /* Do not cache extents that are in the process of being modified. */
+ flags |= EXT4_EX_NOCACHE;

ext_debug(inode, "logical block %llu\n", (unsigned long long)split);

@@ -3258,7 +3267,7 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,

err = PTR_ERR(path);
if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM)
- return path;
+ goto out_path;

/*
* Get a new path to try to zeroout or fix the extent length.
@@ -3272,7 +3281,7 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
if (IS_ERR(path)) {
EXT4_ERROR_INODE(inode, "Failed split extent on %u, err %ld",
split, PTR_ERR(path));
- return path;
+ goto out_path;
}
depth = ext_depth(inode);
ex = path[depth].p_ext;
@@ -3304,6 +3313,23 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
}

if (!err) {
+ /*
+ * The first half contains partially valid data, the
+ * splitting of this extent has not been completed, fix
+ * extent length and ext4_split_extent() split will the
+ * first half again.
+ */
+ if (split_flag & EXT4_EXT_DATA_PARTIAL_VALID1) {
+ /*
+ * Drop extent cache to prevent stale unwritten
+ * extents remaining after zeroing out.
+ */
+ ext4_es_remove_extent(inode,
+ le32_to_cpu(zero_ex.ee_block),
+ ext4_ext_get_actual_len(&zero_ex));
+ goto fix_extent_len;
+ }
+
/* update the extent length and mark as initialized */
ex->ee_len = cpu_to_le16(ee_len);
ext4_ext_try_to_merge(handle, inode, path, ex);
@@ -3332,6 +3358,10 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
ext4_free_ext_path(path);
path = ERR_PTR(err);
}
+out_path:
+ if (IS_ERR(path))
+ /* Remove all remaining potentially stale extents. */
+ ext4_es_remove_extent(inode, ee_block, ee_len);
ext4_ext_show_leaf(inode, path);
return path;
}
@@ -3366,6 +3396,9 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle,
ee_len = ext4_ext_get_actual_len(ex);
unwritten = ext4_ext_is_unwritten(ex);

+ /* Do not cache extents that are in the process of being modified. */
+ flags |= EXT4_EX_NOCACHE;
+
if (map->m_lblk + map->m_len < ee_block + ee_len) {
split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
@@ -3373,7 +3406,9 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle,
split_flag1 |= EXT4_EXT_MARK_UNWRIT1 |
EXT4_EXT_MARK_UNWRIT2;
if (split_flag & EXT4_EXT_DATA_VALID2)
- split_flag1 |= EXT4_EXT_DATA_VALID1;
+ split_flag1 |= map->m_lblk > ee_block ?
+ EXT4_EXT_DATA_PARTIAL_VALID1 :
+ EXT4_EXT_DATA_ENTIRE_VALID1;
path = ext4_split_extent_at(handle, inode, path,
map->m_lblk + map->m_len, split_flag1, flags1);
if (IS_ERR(path))
@@ -3732,7 +3767,7 @@ static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle,

/* Convert to unwritten */
if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
- split_flag |= EXT4_EXT_DATA_VALID1;
+ split_flag |= EXT4_EXT_DATA_ENTIRE_VALID1;
/* Convert to initialized */
} else if (flags & EXT4_GET_BLOCKS_CONVERT) {
split_flag |= ee_block + ee_len <= eof_block ?
@@ -5375,7 +5410,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
if (!extent) {
EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
(unsigned long) *iterator);
- return -EFSCORRUPTED;
+ ret = -EFSCORRUPTED;
+ goto out;
}
if (SHIFT == SHIFT_LEFT && *iterator >
le32_to_cpu(extent->ee_block)) {
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index fa66b08de999..5bd57d7f921b 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -231,16 +231,16 @@ static bool ext4_fc_disabled(struct super_block *sb)
void ext4_fc_del(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_fc_dentry_update *fc_dentry;
wait_queue_head_t *wq;
+ int alloc_ctx;

if (ext4_fc_disabled(inode->i_sb))
return;

- mutex_lock(&sbi->s_fc_lock);
+ alloc_ctx = ext4_fc_lock(inode->i_sb);
if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
- mutex_unlock(&sbi->s_fc_lock);
+ ext4_fc_unlock(inode->i_sb, alloc_ctx);
return;
}

@@ -275,9 +275,9 @@ void ext4_fc_del(struct inode *inode)
#endif
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
if (ext4_test_inode_state(inode, EXT4_STATE_FC_FLUSHING_DATA)) {
- mutex_unlock(&sbi->s_fc_lock);
+ ext4_fc_unlock(inode->i_sb, alloc_ctx);
schedule();
- mutex_lock(&sbi->s_fc_lock);
+ alloc_ctx = ext4_fc_lock(inode->i_sb);
}
finish_wait(wq, &wait.wq_entry);
}
@@ -288,7 +288,7 @@ void ext4_fc_del(struct inode *inode)
* dentry create references, since it is not needed to log it anyways.
*/
if (list_empty(&ei->i_fc_dilist)) {
- mutex_unlock(&sbi->s_fc_lock);
+ ext4_fc_unlock(inode->i_sb, alloc_ctx);
return;
}

@@ -298,7 +298,7 @@ void ext4_fc_del(struct inode *inode)
list_del_init(&fc_dentry->fcd_dilist);

WARN_ON(!list_empty(&ei->i_fc_dilist));
- mutex_unlock(&sbi->s_fc_lock);
+ ext4_fc_unlock(inode->i_sb, alloc_ctx);

release_dentry_name_snapshot(&fc_dentry->fcd_name);
kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
@@ -315,6 +315,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl
tid_t tid;
bool has_transaction = true;
bool is_ineligible;
+ int alloc_ctx;

if (ext4_fc_disabled(sb))
return;
@@ -329,12 +330,12 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl
has_transaction = false;
read_unlock(&sbi->s_journal->j_state_lock);
}
- mutex_lock(&sbi->s_fc_lock);
+ alloc_ctx = ext4_fc_lock(sb);
is_ineligible = ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
if (has_transaction && (!is_ineligible || tid_gt(tid, sbi->s_fc_ineligible_tid)))
sbi->s_fc_ineligible_tid = tid;
ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
- mutex_unlock(&sbi->s_fc_lock);
+ ext4_fc_unlock(sb, alloc_ctx);
WARN_ON(reason >= EXT4_FC_REASON_MAX);
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
}
@@ -358,6 +359,7 @@ static int ext4_fc_track_template(
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
tid_t tid = 0;
+ int alloc_ctx;
int ret;

tid = handle->h_transaction->t_tid;
@@ -373,14 +375,14 @@ static int ext4_fc_track_template(
if (!enqueue)
return ret;

- mutex_lock(&sbi->s_fc_lock);
+ alloc_ctx = ext4_fc_lock(inode->i_sb);
if (list_empty(&EXT4_I(inode)->i_fc_list))
list_add_tail(&EXT4_I(inode)->i_fc_list,
(sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
&sbi->s_fc_q[FC_Q_STAGING] :
&sbi->s_fc_q[FC_Q_MAIN]);
- mutex_unlock(&sbi->s_fc_lock);
+ ext4_fc_unlock(inode->i_sb, alloc_ctx);

return ret;
}
@@ -402,6 +404,7 @@ static int __track_dentry_update(handle_t *handle, struct inode *inode,
struct inode *dir = dentry->d_parent->d_inode;
struct super_block *sb = inode->i_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int alloc_ctx;

spin_unlock(&ei->i_fc_lock);

@@ -425,7 +428,7 @@ static int __track_dentry_update(handle_t *handle, struct inode *inode,
take_dentry_name_snapshot(&node->fcd_name, dentry);
INIT_LIST_HEAD(&node->fcd_dilist);
INIT_LIST_HEAD(&node->fcd_list);
- mutex_lock(&sbi->s_fc_lock);
+ alloc_ctx = ext4_fc_lock(sb);
if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
list_add_tail(&node->fcd_list,
@@ -446,7 +449,7 @@ static int __track_dentry_update(handle_t *handle, struct inode *inode,
WARN_ON(!list_empty(&ei->i_fc_dilist));
list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
}
- mutex_unlock(&sbi->s_fc_lock);
+ ext4_fc_unlock(sb, alloc_ctx);
spin_lock(&ei->i_fc_lock);

return 0;
@@ -1046,18 +1049,19 @@ static int ext4_fc_perform_commit(journal_t *journal)
struct blk_plug plug;
int ret = 0;
u32 crc = 0;
+ int alloc_ctx;

/*
* Step 1: Mark all inodes on s_fc_q[MAIN] with
* EXT4_STATE_FC_FLUSHING_DATA. This prevents these inodes from being
* freed until the data flush is over.
*/
- mutex_lock(&sbi->s_fc_lock);
+ alloc_ctx = ext4_fc_lock(sb);
list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
ext4_set_inode_state(&iter->vfs_inode,
EXT4_STATE_FC_FLUSHING_DATA);
}
- mutex_unlock(&sbi->s_fc_lock);
+ ext4_fc_unlock(sb, alloc_ctx);

/* Step 2: Flush data for all the eligible inodes. */
ret = ext4_fc_flush_data(journal);
@@ -1067,7 +1071,7 @@ static int ext4_fc_perform_commit(journal_t *journal)
* any error from step 2. This ensures that waiters waiting on
* EXT4_STATE_FC_FLUSHING_DATA can resume.
*/
- mutex_lock(&sbi->s_fc_lock);
+ alloc_ctx = ext4_fc_lock(sb);
list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
ext4_clear_inode_state(&iter->vfs_inode,
EXT4_STATE_FC_FLUSHING_DATA);
@@ -1084,7 +1088,7 @@ static int ext4_fc_perform_commit(journal_t *journal)
* prepare_to_wait() in ext4_fc_del().
*/
smp_mb();
- mutex_unlock(&sbi->s_fc_lock);
+ ext4_fc_unlock(sb, alloc_ctx);

/*
* If we encountered error in Step 2, return it now after clearing
@@ -1101,12 +1105,12 @@ static int ext4_fc_perform_commit(journal_t *journal)
* previous handles are now drained. We now mark the inodes on the
* commit queue as being committed.
*/
- mutex_lock(&sbi->s_fc_lock);
+ alloc_ctx = ext4_fc_lock(sb);
list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
ext4_set_inode_state(&iter->vfs_inode,
EXT4_STATE_FC_COMMITTING);
}
- mutex_unlock(&sbi->s_fc_lock);
+ ext4_fc_unlock(sb, alloc_ctx);
jbd2_journal_unlock_updates(journal);

/*
@@ -1117,6 +1121,7 @@ static int ext4_fc_perform_commit(journal_t *journal)
blkdev_issue_flush(journal->j_fs_dev);

blk_start_plug(&plug);
+ alloc_ctx = ext4_fc_lock(sb);
/* Step 6: Write fast commit blocks to disk. */
if (sbi->s_fc_bytes == 0) {
/*
@@ -1134,7 +1139,6 @@ static int ext4_fc_perform_commit(journal_t *journal)
}

/* Step 6.2: Now write all the dentry updates. */
- mutex_lock(&sbi->s_fc_lock);
ret = ext4_fc_commit_dentry_updates(journal, &crc);
if (ret)
goto out;
@@ -1156,7 +1160,7 @@ static int ext4_fc_perform_commit(journal_t *journal)
ret = ext4_fc_write_tail(sb, crc);

out:
- mutex_unlock(&sbi->s_fc_lock);
+ ext4_fc_unlock(sb, alloc_ctx);
blk_finish_plug(&plug);
return ret;
}
@@ -1290,6 +1294,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_inode_info *ei;
struct ext4_fc_dentry_update *fc_dentry;
+ int alloc_ctx;

if (full && sbi->s_fc_bh)
sbi->s_fc_bh = NULL;
@@ -1297,7 +1302,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
trace_ext4_fc_cleanup(journal, full, tid);
jbd2_fc_release_bufs(journal);

- mutex_lock(&sbi->s_fc_lock);
+ alloc_ctx = ext4_fc_lock(sb);
while (!list_empty(&sbi->s_fc_q[FC_Q_MAIN])) {
ei = list_first_entry(&sbi->s_fc_q[FC_Q_MAIN],
struct ext4_inode_info,
@@ -1356,7 +1361,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)

if (full)
sbi->s_fc_bytes = 0;
- mutex_unlock(&sbi->s_fc_lock);
+ ext4_fc_unlock(sb, alloc_ctx);
trace_ext4_fc_stats(sb);
}

diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c
index a9416b20ff64..4abb40d4561c 100644
--- a/fs/ext4/mballoc-test.c
+++ b/fs/ext4/mballoc-test.c
@@ -567,7 +567,7 @@ test_mark_diskspace_used_range(struct kunit *test,

bitmap = mbt_ctx_bitmap(sb, TEST_GOAL_GROUP);
memset(bitmap, 0, sb->s_blocksize);
- ret = ext4_mb_mark_diskspace_used(ac, NULL, 0);
+ ret = ext4_mb_mark_diskspace_used(ac, NULL);
KUNIT_ASSERT_EQ(test, ret, 0);

max = EXT4_CLUSTERS_PER_GROUP(sb);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 65335248825c..412289e5c0af 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -892,6 +892,21 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
}
}

+static ext4_group_t ext4_get_allocation_groups_count(
+ struct ext4_allocation_context *ac)
+{
+ ext4_group_t ngroups = ext4_get_groups_count(ac->ac_sb);
+
+ /* non-extent files are limited to low blocks/groups */
+ if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
+ ngroups = EXT4_SB(ac->ac_sb)->s_blockfile_groups;
+
+ /* Pairs with smp_wmb() in ext4_update_super() */
+ smp_rmb();
+
+ return ngroups;
+}
+
static int ext4_mb_scan_groups_xa_range(struct ext4_allocation_context *ac,
struct xarray *xa,
ext4_group_t start, ext4_group_t end)
@@ -899,7 +914,7 @@ static int ext4_mb_scan_groups_xa_range(struct ext4_allocation_context *ac,
struct super_block *sb = ac->ac_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
enum criteria cr = ac->ac_criteria;
- ext4_group_t ngroups = ext4_get_groups_count(sb);
+ ext4_group_t ngroups = ext4_get_allocation_groups_count(ac);
unsigned long group = start;
struct ext4_group_info *grp;

@@ -951,7 +966,7 @@ static int ext4_mb_scan_groups_p2_aligned(struct ext4_allocation_context *ac,
ext4_group_t start, end;

start = group;
- end = ext4_get_groups_count(ac->ac_sb);
+ end = ext4_get_allocation_groups_count(ac);
wrap_around:
for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
ret = ext4_mb_scan_groups_largest_free_order_range(ac, i,
@@ -1001,7 +1016,7 @@ static int ext4_mb_scan_groups_goal_fast(struct ext4_allocation_context *ac,
ext4_group_t start, end;

start = group;
- end = ext4_get_groups_count(ac->ac_sb);
+ end = ext4_get_allocation_groups_count(ac);
wrap_around:
i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len);
for (; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
@@ -1083,7 +1098,7 @@ static int ext4_mb_scan_groups_best_avail(struct ext4_allocation_context *ac,
min_order = fls(ac->ac_o_ex.fe_len);

start = group;
- end = ext4_get_groups_count(ac->ac_sb);
+ end = ext4_get_allocation_groups_count(ac);
wrap_around:
for (i = order; i >= min_order; i--) {
int frag_order;
@@ -1133,8 +1148,6 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
return 0;
if (ac->ac_criteria >= CR_GOAL_LEN_SLOW)
return 0;
- if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
- return 0;
return 1;
}

@@ -1182,11 +1195,7 @@ static int ext4_mb_scan_groups(struct ext4_allocation_context *ac)
int ret = 0;
ext4_group_t start;
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- ext4_group_t ngroups = ext4_get_groups_count(ac->ac_sb);
-
- /* non-extent files are limited to low blocks/groups */
- if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
- ngroups = sbi->s_blockfile_groups;
+ ext4_group_t ngroups = ext4_get_allocation_groups_count(ac);

/* searching for the right group start from the goal value specified */
start = ac->ac_g_ex.fe_group;
@@ -1712,16 +1721,17 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,

/* Avoid locking the folio in the fast path ... */
folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_ACCESSED, 0);
- if (IS_ERR(folio) || !folio_test_uptodate(folio)) {
+ if (IS_ERR(folio) || !folio_test_uptodate(folio) || folio_test_locked(folio)) {
+ /*
+ * folio_test_locked is employed to detect ongoing folio
+ * migrations, since concurrent migrations can lead to
+ * bitmap inconsistency. And if we are not uptodate that
+ * implies somebody just created the folio but is yet to
+ * initialize it. We can drop the folio reference and
+ * try to get the folio with lock in both cases to avoid
+ * concurrency.
+ */
if (!IS_ERR(folio))
- /*
- * drop the folio reference and try
- * to get the folio with lock. If we
- * are not uptodate that implies
- * somebody just created the folio but
- * is yet to initialize it. So
- * wait for it to initialize.
- */
folio_put(folio);
folio = __filemap_get_folio(inode->i_mapping, pnum,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
@@ -1763,7 +1773,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
poff = block % blocks_per_page;

folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_ACCESSED, 0);
- if (IS_ERR(folio) || !folio_test_uptodate(folio)) {
+ if (IS_ERR(folio) || !folio_test_uptodate(folio) || folio_test_locked(folio)) {
if (!IS_ERR(folio))
folio_put(folio);
folio = __filemap_get_folio(inode->i_mapping, pnum,
@@ -4180,8 +4190,7 @@ ext4_mb_mark_context(handle_t *handle, struct super_block *sb, bool state,
* Returns 0 if success or error code
*/
static noinline_for_stack int
-ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
- handle_t *handle, unsigned int reserv_clstrs)
+ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, handle_t *handle)
{
struct ext4_group_desc *gdp;
struct ext4_sb_info *sbi;
@@ -4236,13 +4245,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
BUG_ON(changed != ac->ac_b_ex.fe_len);
#endif
percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
- /*
- * Now reduce the dirty block count also. Should not go negative
- */
- if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
- /* release all the reserved blocks if non delalloc */
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- reserv_clstrs);

return err;
}
@@ -6327,7 +6329,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
ext4_mb_pa_put_free(ac);
}
if (likely(ac->ac_status == AC_STATUS_FOUND)) {
- *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
+ *errp = ext4_mb_mark_diskspace_used(ac, handle);
if (*errp) {
ext4_discard_allocated_blocks(ac);
goto errout;
@@ -6358,12 +6360,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
out:
if (inquota && ar->len < inquota)
dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
- if (!ar->len) {
- if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
- /* release all the reserved blocks if non delalloc */
- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
- reserv_clstrs);
- }
+ /* release any reserved blocks */
+ if (reserv_clstrs)
+ percpu_counter_sub(&sbi->s_dirtyclusters_counter, reserv_clstrs);

trace_ext4_allocate_blocks(ar, (unsigned long long)block);

diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 0b920ee40a7f..262ec1b790b5 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -325,7 +325,12 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry)
err = fat_remove_entries(dir, &sinfo); /* and releases bh */
if (err)
goto out;
- drop_nlink(dir);
+ if (dir->i_nlink >= 3)
+ drop_nlink(dir);
+ else {
+ fat_fs_error(sb, "parent dir link count too low (%u)",
+ dir->i_nlink);
+ }

clear_nlink(inode);
fat_truncate_time(inode, NULL, S_CTIME);
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 5dbc4cbb8fce..47ff083cfc7e 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -803,7 +803,12 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry)
err = fat_remove_entries(dir, &sinfo); /* and releases bh */
if (err)
goto out;
- drop_nlink(dir);
+ if (dir->i_nlink >= 3)
+ drop_nlink(dir);
+ else {
+ fat_fs_error(sb, "parent dir link count too low (%u)",
+ dir->i_nlink);
+ }

clear_nlink(inode);
fat_truncate_time(inode, NULL, S_ATIME|S_MTIME);
diff --git a/fs/file_table.c b/fs/file_table.c
index cd4a3db4659a..34244fccf2ed 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -176,6 +176,11 @@ static int init_file(struct file *f, int flags, const struct cred *cred)

f->f_flags = flags;
f->f_mode = OPEN_FMODE(flags);
+ /*
+ * Disable permission and pre-content events for all files by default.
+ * They may be enabled later by fsnotify_open_perm_and_set_mode().
+ */
+ file_set_fsnotify_mode(f, FMODE_NONOTIFY_PERM);

f->f_op = NULL;
f->f_mapping = NULL;
@@ -197,11 +202,6 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
* refcount bumps we should reinitialize the reused file first.
*/
file_ref_init(&f->f_ref, 1);
- /*
- * Disable permission and pre-content events for all files by default.
- * They may be enabled later by fsnotify_open_perm_and_set_mode().
- */
- file_set_fsnotify_mode(f, FMODE_NONOTIFY_PERM);
return 0;
}

diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 28be762ac1c6..a0b40ad5e742 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -6,6 +6,7 @@
#include <linux/path.h>
#include <linux/slab.h>
#include <linux/fs_struct.h>
+#include <linux/init_task.h>
#include "internal.h"

/*
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 131091520de6..fdcac8e3f2ba 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1127,10 +1127,18 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
goto out_unlock;
break;
default:
- goto out_unlock;
+ goto out;
}

ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
+ if (ret)
+ goto out_unlock;
+
+out:
+ if (iomap->type == IOMAP_INLINE) {
+ iomap->private = metapath_dibh(&mp);
+ get_bh(iomap->private);
+ }

out_unlock:
release_metapath(&mp);
@@ -1144,6 +1152,9 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);

+ if (iomap->private)
+ brelse(iomap->private);
+
switch (flags & (IOMAP_WRITE | IOMAP_ZERO)) {
case IOMAP_WRITE:
if (flags & IOMAP_DIRECT)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 9f2eb7e38569..7aec6cfdfd91 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1354,31 +1354,45 @@ static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs)
* gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions
* @num_gh: the number of holders in the array
* @ghs: the glock holder array
+ * @retries: number of retries attempted so far
*
* Returns: 0 on success, meaning all glocks have been granted and are held.
* -ESTALE if the request timed out, meaning all glocks were released,
* and the caller should retry the operation.
*/

-int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
+int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs,
+ unsigned int retries)
{
struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd;
- int i, ret = 0, timeout = 0;
unsigned long start_time = jiffies;
+ int i, ret = 0;
+ long timeout;

might_sleep();
- /*
- * Total up the (minimum hold time * 2) of all glocks and use that to
- * determine the max amount of time we should wait.
- */
- for (i = 0; i < num_gh; i++)
- timeout += ghs[i].gh_gl->gl_hold_time << 1;

- if (!wait_event_timeout(sdp->sd_async_glock_wait,
+ timeout = GL_GLOCK_MIN_HOLD;
+ if (retries) {
+ unsigned int max_shift;
+ long incr;
+
+ /* Add a random delay and increase the timeout exponentially. */
+ max_shift = BITS_PER_LONG - 2 - __fls(GL_GLOCK_HOLD_INCR);
+ incr = min(GL_GLOCK_HOLD_INCR << min(retries - 1, max_shift),
+ 10 * HZ - GL_GLOCK_MIN_HOLD);
+ schedule_timeout_interruptible(get_random_long() % (incr / 3));
+ if (signal_pending(current))
+ goto interrupted;
+ timeout += (incr / 3) + get_random_long() % (incr / 3);
+ }
+
+ if (!wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
!glocks_pending(num_gh, ghs), timeout)) {
ret = -ESTALE; /* request timed out. */
goto out;
}
+ if (signal_pending(current))
+ goto interrupted;

for (i = 0; i < num_gh; i++) {
struct gfs2_holder *gh = &ghs[i];
@@ -1402,6 +1416,10 @@ int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
}
}
return ret;
+
+interrupted:
+ ret = -EINTR;
+ goto out;
}

/**
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index d041b922b45e..2d4fd1a2bbbb 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -204,7 +204,8 @@ int gfs2_glock_poll(struct gfs2_holder *gh);
int gfs2_instantiate(struct gfs2_holder *gh);
int gfs2_glock_holder_ready(struct gfs2_holder *gh);
int gfs2_glock_wait(struct gfs2_holder *gh);
-int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
+int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs,
+ unsigned int retries);
void gfs2_glock_dq(struct gfs2_holder *gh);
void gfs2_glock_dq_wait(struct gfs2_holder *gh);
void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index d7e35a05c161..63d9fe746434 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1495,7 +1495,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
unsigned int num_gh;
int dir_rename = 0;
struct gfs2_diradd da = { .nr_blocks = 0, .save_loc = 0, };
- unsigned int x;
+ unsigned int retries = 0, x;
int error;

gfs2_holder_mark_uninitialized(&r_gh);
@@ -1545,12 +1545,17 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
num_gh++;
}

+again:
for (x = 0; x < num_gh; x++) {
error = gfs2_glock_nq(ghs + x);
if (error)
goto out_gunlock;
}
- error = gfs2_glock_async_wait(num_gh, ghs);
+ error = gfs2_glock_async_wait(num_gh, ghs, retries);
+ if (error == -ESTALE) {
+ retries++;
+ goto again;
+ }
if (error)
goto out_gunlock;

@@ -1739,7 +1744,7 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
struct gfs2_sbd *sdp = GFS2_SB(odir);
struct gfs2_holder ghs[4], r_gh;
unsigned int num_gh;
- unsigned int x;
+ unsigned int retries = 0, x;
umode_t old_mode = oip->i_inode.i_mode;
umode_t new_mode = nip->i_inode.i_mode;
int error;
@@ -1783,13 +1788,18 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC, ghs + num_gh);
num_gh++;

+again:
for (x = 0; x < num_gh; x++) {
error = gfs2_glock_nq(ghs + x);
if (error)
goto out_gunlock;
}

- error = gfs2_glock_async_wait(num_gh, ghs);
+ error = gfs2_glock_async_wait(num_gh, ghs, retries);
+ if (error == -ESTALE) {
+ retries++;
+ goto again;
+ }
if (error)
goto out_gunlock;

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index f2df01f801b8..898fc3937b44 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -334,6 +334,7 @@ static void qd_put(struct gfs2_quota_data *qd)
lockref_mark_dead(&qd->qd_lockref);
spin_unlock(&qd->qd_lockref.lock);

+ list_lru_del_obj(&gfs2_qd_lru, &qd->qd_lru);
gfs2_qd_dispose(qd);
return;
}
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 482a6c5faa19..8e60e04c427b 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -629,7 +629,7 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
if (node) {
pr_crit("new node %u already hashed?\n", num);
WARN_ON(1);
- return node;
+ return ERR_PTR(-EEXIST);
}
node = __hfs_bnode_create(tree, num);
if (!node)
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 6317e4cd4251..e73c71f39bd4 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -429,9 +429,13 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
do {
size_t n;
- if (dio->error) {
- iov_iter_revert(dio->submit.iter, copied);
- copied = ret = 0;
+
+ /*
+ * If completions already occurred and reported errors, give up now and
+ * don't bother submitting more bios.
+ */
+ if (unlikely(data_race(dio->error))) {
+ ret = 0;
goto out;
}

diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 0ab83bb7bbdf..9ab3f2fc61d1 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -2903,7 +2903,7 @@ int jfs_readdir(struct file *file, struct dir_context *ctx)
stbl = DT_GETSTBL(p);

for (i = index; i < p->header.nextindex; i++) {
- if (stbl[i] < 0 || stbl[i] >= DTPAGEMAXSLOT) {
+ if (stbl[i] < 0) {
jfs_err("JFS: Invalid stbl[%d] = %d for inode %ld, block = %lld",
i, stbl[i], (long)ip->i_ino, (long long)bn);
free_page(dirent_buf);
@@ -3108,7 +3108,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack)
/* get the leftmost entry */
stbl = DT_GETSTBL(p);

- if (stbl[0] < 0 || stbl[0] >= DTPAGEMAXSLOT) {
+ if (stbl[0] < 0) {
DT_PUTPAGE(mp);
jfs_error(ip->i_sb, "stbl[0] out of bound\n");
return -EIO;
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
index fc9c3e0d34d8..29489a23a220 100644
--- a/fs/netfs/write_retry.c
+++ b/fs/netfs/write_retry.c
@@ -98,7 +98,6 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
subreq->start = start;
subreq->len = len;
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- subreq->retry_count++;
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);

/* Renegotiate max_len (wsize) */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3b8250ee0141..a653a401b797 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -72,7 +72,7 @@ const struct address_space_operations nfs_dir_aops = {
.free_folio = nfs_readdir_clear_array,
};

-#define NFS_INIT_DTSIZE PAGE_SIZE
+#define NFS_INIT_DTSIZE SZ_64K

static struct nfs_open_dir_context *
alloc_nfs_open_dir_context(struct inode *dir)
@@ -83,7 +83,7 @@ alloc_nfs_open_dir_context(struct inode *dir)
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
if (ctx != NULL) {
ctx->attr_gencount = nfsi->attr_gencount;
- ctx->dtsize = NFS_INIT_DTSIZE;
+ ctx->dtsize = min(NFS_SERVER(dir)->dtsize, NFS_INIT_DTSIZE);
spin_lock(&dir->i_lock);
if (list_empty(&nfsi->open_files) &&
(nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index f537bc3386bf..358d686d2ae3 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -58,6 +58,11 @@ struct nfs_local_fsync_ctx {
static bool localio_enabled __read_mostly = true;
module_param(localio_enabled, bool, 0644);

+static int nfs_local_do_read(struct nfs_local_kiocb *iocb,
+ const struct rpc_call_ops *call_ops);
+static int nfs_local_do_write(struct nfs_local_kiocb *iocb,
+ const struct rpc_call_ops *call_ops);
+
static inline bool nfs_client_is_local(const struct nfs_client *clp)
{
return !!rcu_access_pointer(clp->cl_uuid.net);
@@ -286,6 +291,18 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
}
EXPORT_SYMBOL_GPL(nfs_local_open_fh);

+/*
+ * Ensure all page cache allocations are done from GFP_NOFS context to
+ * prevent direct reclaim recursion back into NFS via nfs_writepages.
+ */
+static void
+nfs_local_mapping_set_gfp_nofs_context(struct address_space *m)
+{
+ gfp_t gfp_mask = mapping_gfp_mask(m);
+
+ mapping_set_gfp_mask(m, (gfp_mask & ~(__GFP_FS)));
+}
+
static void
nfs_local_iocb_free(struct nfs_local_kiocb *iocb)
{
@@ -310,6 +327,7 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
return NULL;
}

+ nfs_local_mapping_set_gfp_nofs_context(file->f_mapping);
init_sync_kiocb(&iocb->kiocb, file);

iocb->hdr = hdr;
@@ -542,13 +560,50 @@ nfs_local_iocb_release(struct nfs_local_kiocb *iocb)
nfs_local_iocb_free(iocb);
}

-static void
-nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
+static void nfs_local_pgio_restart(struct nfs_local_kiocb *iocb,
+ struct nfs_pgio_header *hdr)
+{
+ int status = 0;
+
+ iocb->kiocb.ki_pos = hdr->args.offset;
+ iocb->kiocb.ki_flags &= ~(IOCB_DSYNC | IOCB_SYNC | IOCB_DIRECT);
+ iocb->kiocb.ki_complete = NULL;
+ iocb->aio_complete_work = NULL;
+ iocb->end_iter_index = -1;
+
+ switch (hdr->rw_mode) {
+ case FMODE_READ:
+ nfs_local_iters_init(iocb, ITER_DEST);
+ status = nfs_local_do_read(iocb, hdr->task.tk_ops);
+ break;
+ case FMODE_WRITE:
+ nfs_local_iters_init(iocb, ITER_SOURCE);
+ status = nfs_local_do_write(iocb, hdr->task.tk_ops);
+ break;
+ default:
+ status = -EOPNOTSUPP;
+ }
+
+ if (status != 0) {
+ nfs_local_iocb_release(iocb);
+ hdr->task.tk_status = status;
+ nfs_local_hdr_release(hdr, hdr->task.tk_ops);
+ }
+}
+
+static void nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
{
struct nfs_pgio_header *hdr = iocb->hdr;
+ struct rpc_task *task = &hdr->task;
+
+ task->tk_action = NULL;
+ task->tk_ops->rpc_call_done(task, hdr);

- nfs_local_iocb_release(iocb);
- nfs_local_hdr_release(hdr, hdr->task.tk_ops);
+ if (task->tk_action == NULL) {
+ nfs_local_iocb_release(iocb);
+ task->tk_ops->rpc_release(hdr);
+ } else
+ nfs_local_pgio_restart(iocb, hdr);
}

/*
@@ -776,19 +831,7 @@ static void nfs_local_write_done(struct nfs_local_kiocb *iocb)
pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n");
}

- /* Handle short writes as if they are ENOSPC */
- status = hdr->res.count;
- if (status > 0 && status < hdr->args.count) {
- hdr->mds_offset += status;
- hdr->args.offset += status;
- hdr->args.pgbase += status;
- hdr->args.count -= status;
- nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset);
- status = -ENOSPC;
- /* record -ENOSPC in terms of nfs_local_pgio_done */
- (void) nfs_local_pgio_done(iocb, status, true);
- }
- if (hdr->task.tk_status < 0)
+ if (status < 0)
nfs_reset_boot_verifier(hdr->inode);
}

@@ -952,8 +995,6 @@ int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio,
}

if (status != 0) {
- if (status == -EAGAIN)
- nfs_localio_disable_client(clp);
nfs_local_iocb_release(iocb);
hdr->task.tk_status = status;
nfs_local_hdr_release(hdr, call_ops);
@@ -980,6 +1021,8 @@ nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data)
end = LLONG_MAX;
}

+ nfs_local_mapping_set_gfp_nofs_context(filp->f_mapping);
+
dprintk("%s: commit %llu - %llu\n", __func__, start, end);
return vfs_fsync_range(filp, start, end, 0);
}
@@ -1021,17 +1064,22 @@ nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx)
static void
nfs_local_fsync_work(struct work_struct *work)
{
+ unsigned long old_flags = current->flags;
struct nfs_local_fsync_ctx *ctx;
int status;

ctx = container_of(work, struct nfs_local_fsync_ctx, work);

+ current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
+
status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio),
ctx->data);
nfs_local_commit_done(ctx->data, status);
if (ctx->done != NULL)
complete(ctx->done);
nfs_local_fsync_ctx_free(ctx);
+
+ current->flags = old_flags;
}

static struct nfs_local_fsync_ctx *
@@ -1055,7 +1103,7 @@ int nfs_local_commit(struct nfsd_file *localio,
{
struct nfs_local_fsync_ctx *ctx;

- ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_KERNEL);
+ ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_NOIO);
if (!ctx) {
nfs_local_commit_done(data, -ENOMEM);
nfs_local_release_commit_data(localio, data, call_ops);
@@ -1067,10 +1115,10 @@ int nfs_local_commit(struct nfsd_file *localio,
if (how & FLUSH_SYNC) {
DECLARE_COMPLETION_ONSTACK(done);
ctx->done = &done;
- queue_work(nfsiod_workqueue, &ctx->work);
+ queue_work(nfslocaliod_workqueue, &ctx->work);
wait_for_completion(&done);
} else
- queue_work(nfsiod_workqueue, &ctx->work);
+ queue_work(nfslocaliod_workqueue, &ctx->work);

return 0;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 33bc6db0dc92..b3cb5ee9d821 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -463,7 +463,8 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
};
struct pnfs_layout_segment *lseg, *next;

- set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+ if (test_and_set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags))
+ return !list_empty(&lo->plh_segs);
clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(lo->plh_inode)->flags);
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
pnfs_clear_lseg_state(lseg, lseg_list);
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 5fb202acb0fd..0ac538c76180 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -45,7 +45,7 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp)
inode = d_inode(fh->fh_dentry);

if (argp->mask & ~NFS_ACL_MASK) {
- resp->status = nfserr_inval;
+ resp->status = nfserr_io;
goto out;
}
resp->mask = argp->mask;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 8cca1329f348..b5b3d45979c9 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -643,13 +643,31 @@ static __be32 encode_name_from_id(struct xdr_stream *xdr,
return idmap_id_to_name(xdr, rqstp, type, id);
}

-__be32
-nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
- kuid_t *uid)
+/**
+ * nfsd_map_name_to_uid - Map user@domain to local UID
+ * @rqstp: RPC execution context
+ * @name: user@domain name to be mapped
+ * @namelen: length of name, in bytes
+ * @uid: OUT: mapped local UID value
+ *
+ * Returns nfs_ok on success or an NFSv4 status code on failure.
+ */
+__be32 nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name,
+ size_t namelen, kuid_t *uid)
{
__be32 status;
u32 id = -1;

+ /*
+ * The idmap lookup below triggers an upcall that invokes
+ * cache_check(). RQ_USEDEFERRAL must be clear to prevent
+ * cache_check() from setting RQ_DROPME via svc_defer().
+ * NFSv4 servers are not permitted to drop requests. Also
+ * RQ_DROPME will force NFSv4.1 session slot processing to
+ * be skipped.
+ */
+ WARN_ON_ONCE(test_bit(RQ_USEDEFERRAL, &rqstp->rq_flags));
+
if (name == NULL || namelen == 0)
return nfserr_inval;

@@ -660,13 +678,31 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
return status;
}

-__be32
-nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
- kgid_t *gid)
+/**
+ * nfsd_map_name_to_gid - Map user@domain to local GID
+ * @rqstp: RPC execution context
+ * @name: user@domain name to be mapped
+ * @namelen: length of name, in bytes
+ * @gid: OUT: mapped local GID value
+ *
+ * Returns nfs_ok on success or an NFSv4 status code on failure.
+ */
+__be32 nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name,
+ size_t namelen, kgid_t *gid)
{
__be32 status;
u32 id = -1;

+ /*
+ * The idmap lookup below triggers an upcall that invokes
+ * cache_check(). RQ_USEDEFERRAL must be clear to prevent
+ * cache_check() from setting RQ_DROPME via svc_defer().
+ * NFSv4 servers are not permitted to drop requests. Also
+ * RQ_DROPME will force NFSv4.1 session slot processing to
+ * be skipped.
+ */
+ WARN_ON_ONCE(test_bit(RQ_USEDEFERRAL, &rqstp->rq_flags));
+
if (name == NULL || namelen == 0)
return nfserr_inval;

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 42a6b914c0fe..8dada7ef97cb 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2995,8 +2995,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
BUG_ON(cstate->replay_owner);
out:
cstate->status = status;
- /* Reset deferral mechanism for RPC deferrals */
- set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
return rpc_success;
}

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 4a403ce4fd46..5f046d5be4a6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -6001,6 +6001,22 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
args->ops = args->iops;
args->rqstp = rqstp;

+ /*
+ * NFSv4 operation decoders can invoke svc cache lookups
+ * that trigger svc_defer() when RQ_USEDEFERRAL is set,
+ * setting RQ_DROPME. This creates two problems:
+ *
+ * 1. Non-idempotency: Compounds make it too hard to avoid
+ * problems if a request is deferred and replayed.
+ *
+ * 2. Session slot leakage (NFSv4.1+): If RQ_DROPME is set
+ * during decode but SEQUENCE executes successfully, the
+ * session slot will be marked INUSE. The request is then
+ * dropped before encoding, so the slot is never released,
+ * rendering it permanently unusable by the client.
+ */
+ clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+
return nfsd4_decode_compound(args);
}

diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 8f71f5748c75..906a67257890 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -33,7 +33,7 @@ static __be32 nfsd_map_status(__be32 status)
break;
case nfserr_symlink:
case nfserr_wrong_type:
- status = nfserr_inval;
+ status = nfserr_io;
break;
}
return status;
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 83f0072f0896..cd7aaeef45fe 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -930,7 +930,7 @@ static int ntfs_get_frame_pages(struct address_space *mapping, pgoff_t index,

folio = __filemap_get_folio(mapping, index,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
- gfp_mask);
+ gfp_mask | __GFP_ZERO);
if (IS_ERR(folio)) {
while (npages--) {
folio = page_folio(pages[npages]);
@@ -1012,8 +1012,12 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
goto out;

if (lcn == SPARSE_LCN) {
- ni->i_valid = valid =
- frame_vbo + ((u64)clen << sbi->cluster_bits);
+ valid = frame_vbo + ((u64)clen << sbi->cluster_bits);
+ if (ni->i_valid == valid) {
+ err = -EINVAL;
+ goto out;
+ }
+ ni->i_valid = valid;
continue;
}

diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
index 38934e6978ec..28bd611f580d 100644
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -3429,6 +3429,9 @@ static int do_action(struct ntfs_log *log, struct OPEN_ATTR_ENRTY *oe,

e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
esize = le16_to_cpu(e1->size);
+ if (PtrOffset(e1, Add2Ptr(hdr, used)) < esize)
+ goto dirty_vol;
+
e2 = Add2Ptr(e1, esize);

memmove(e1, e2, PtrOffset(e2, Add2Ptr(hdr, used)));
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 1e9792cc557b..3c27e7a16f94 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -761,7 +761,7 @@ static bool ovl_fill_real(struct dir_context *ctx, const char *name,
struct dir_context *orig_ctx = rdt->orig_ctx;
bool res;

- if (rdt->parent_ino && strcmp(name, "..") == 0) {
+ if (rdt->parent_ino && namelen == 2 && !strncmp(name, "..", 2)) {
ino = rdt->parent_ino;
} else if (rdt->cache) {
struct ovl_cache_entry *p;
diff --git a/fs/pidfs.c b/fs/pidfs.c
index f4d7dac1b449..34987fcdd9a8 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -321,7 +321,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
* namespace hierarchy.
*/
if (!pid_in_current_pidns(pid))
- return -ESRCH;
+ return -EREMOTE;

attr = READ_ONCE(pid->attr);
if (mask & PIDFD_INFO_EXIT) {
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 2ae63189091e..038d4b57127f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -529,7 +529,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
}

sid = task_session_nr_ns(task, ns);
- ppid = task_tgid_nr_ns(task->real_parent, ns);
+ ppid = task_ppid_nr_ns(task, ns);
pgid = task_pgrp_nr_ns(task, ns);

unlock_task_sighand(task, &flags);
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index f1848cdd6d34..c9eaacdec37e 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -298,6 +298,17 @@ void persistent_ram_save_old(struct persistent_ram_zone *prz)
if (!size)
return;

+ /*
+ * If the existing buffer is differently sized, free it so a new
+ * one is allocated. This can happen when persistent_ram_save_old()
+ * is called early in boot and later for a timer-triggered
+ * survivable crash when the crash dumps don't match in size
+ * (which would be extremely unlikely given kmsg buffers usually
+ * exceed prz buffer sizes).
+ */
+ if (prz->old_log && prz->old_log_size != size)
+ persistent_ram_free_old(prz);
+
if (!prz->old_log) {
persistent_ram_ecc_old(prz);
prz->old_log = kvzalloc(size, GFP_KERNEL);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 7c2b75a44485..de4379a9c792 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -899,6 +899,7 @@ static struct super_block *quotactl_block(const char __user *special, int cmd)
sb_start_write(sb);
sb_end_write(sb);
put_super(sb);
+ cond_resched();
goto retry;
}
return sb;
diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c
index 03f90553d831..e6cdf2efc7f4 100644
--- a/fs/smb/client/smb2file.c
+++ b/fs/smb/client/smb2file.c
@@ -178,6 +178,8 @@ int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32
&err_buftype);
if (rc == -EACCES && retry_without_read_attributes) {
free_rsp_buf(err_buftype, err_iov.iov_base);
+ memset(&err_iov, 0, sizeof(err_iov));
+ err_buftype = CIFS_NO_BUFFER;
oparms->desired_access &= ~FILE_READ_ATTRIBUTES;
rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL, &err_iov,
&err_buftype);
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index 01d55bcc6d0f..c8cef098d480 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -101,8 +101,23 @@ int smbd_send_credit_target = 255;
/* The maximum single message size can be sent to remote peer */
int smbd_max_send_size = 1364;

-/* The maximum fragmented upper-layer payload receive size supported */
-int smbd_max_fragmented_recv_size = 1024 * 1024;
+/*
+ * The maximum fragmented upper-layer payload receive size supported
+ *
+ * Assume max_payload_per_credit is
+ * smbd_max_receive_size - 24 = 1340
+ *
+ * The maximum number would be
+ * smbd_receive_credit_max * max_payload_per_credit
+ *
+ * 1340 * 255 = 341700 (0x536C4)
+ *
+ * The minimum value from the spec is 131072 (0x20000)
+ *
+ * For now we use the logic we used in ksmbd before:
+ * (1364 * 255) / 2 = 173910 (0x2A756)
+ */
+int smbd_max_fragmented_recv_size = (1364 * 255) / 2;

/* The maximum single-message size which can be received */
int smbd_max_receive_size = 1364;
diff --git a/fs/tests/exec_kunit.c b/fs/tests/exec_kunit.c
index 7c77d039680b..f412d1a0f6bb 100644
--- a/fs/tests/exec_kunit.c
+++ b/fs/tests/exec_kunit.c
@@ -87,9 +87,6 @@ static const struct bprm_stack_limits_result bprm_stack_limits_results[] = {
.argc = 0, .envc = ARG_MAX / sizeof(void *) - 1 },
.expected_argmin = ULONG_MAX - sizeof(void *) },
/* Raising rlim_stack / 4 to _STK_LIM / 4 * 3 will see more space. */
- { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3),
- .argc = 0, .envc = 0 },
- .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
{ { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3),
.argc = 0, .envc = 0 },
.expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
@@ -103,9 +100,6 @@ static const struct bprm_stack_limits_result bprm_stack_limits_results[] = {
{ { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * _STK_LIM,
.argc = 0, .envc = 0 },
.expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
- { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * _STK_LIM,
- .argc = 0, .envc = 0 },
- .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
};

static void exec_test_bprm_stack_limits(struct kunit *test)
diff --git a/include/acpi/pcc.h b/include/acpi/pcc.h
index 9af3b502f839..840bfc95bae3 100644
--- a/include/acpi/pcc.h
+++ b/include/acpi/pcc.h
@@ -17,35 +17,6 @@ struct pcc_mbox_chan {
u32 latency;
u32 max_access_rate;
u16 min_turnaround_time;
-
- /* Set to true to indicate that the mailbox should manage
- * writing the dat to the shared buffer. This differs from
- * the case where the drivesr are writing to the buffer and
- * using send_data only to ring the doorbell. If this flag
- * is set, then the void * data parameter of send_data must
- * point to a kernel-memory buffer formatted in accordance with
- * the PCC specification.
- *
- * The active buffer management will include reading the
- * notify_on_completion flag, and will then
- * call mbox_chan_txdone when the acknowledgment interrupt is
- * received.
- */
- bool manage_writes;
-
- /* Optional callback that allows the driver
- * to allocate the memory used for receiving
- * messages. The return value is the location
- * inside the buffer where the mailbox should write the data.
- */
- void *(*rx_alloc)(struct mbox_client *cl, int size);
-};
-
-struct pcc_header {
- u32 signature;
- u32 flags;
- u32 length;
- u32 command;
};

/* Generic Communications Channel Shared Memory Region */
diff --git a/include/asm-generic/rqspinlock.h b/include/asm-generic/rqspinlock.h
index 0f2dcbbfee2f..5c5cf2f7fc39 100644
--- a/include/asm-generic/rqspinlock.h
+++ b/include/asm-generic/rqspinlock.h
@@ -191,7 +191,7 @@ static __always_inline int res_spin_lock(rqspinlock_t *lock)

#else

-#define res_spin_lock(lock) resilient_tas_spin_lock(lock)
+#define res_spin_lock(lock) ({ grab_held_lock_entry(lock); resilient_tas_spin_lock(lock); })

#endif /* CONFIG_QUEUED_SPINLOCKS */

diff --git a/include/drm/intel/intel_lb_mei_interface.h b/include/drm/intel/intel_lb_mei_interface.h
index d65be2cba2ab..0850738a30fc 100644
--- a/include/drm/intel/intel_lb_mei_interface.h
+++ b/include/drm/intel/intel_lb_mei_interface.h
@@ -53,7 +53,8 @@ enum intel_lb_status {
*/
struct intel_lb_component_ops {
/**
- * push_payload - Sends a payload to the authentication firmware
+ * @push_payload: Sends a payload to the authentication firmware
+ *
* @dev: Device struct corresponding to the mei device
* @type: Payload type (see &enum intel_lb_type)
* @flags: Payload flags bitmap (e.g. %INTEL_LB_FLAGS_IS_PERSISTENT)
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 536f8ee8da81..b8d8029c6c48 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -128,12 +128,6 @@ enum audit_nfcfgop {
extern int __init audit_register_class(int class, unsigned *list);
extern int audit_classify_syscall(int abi, unsigned syscall);
extern int audit_classify_arch(int arch);
-/* only for compat system calls */
-extern unsigned compat_write_class[];
-extern unsigned compat_read_class[];
-extern unsigned compat_dir_class[];
-extern unsigned compat_chattr_class[];
-extern unsigned compat_signal_class[];

/* audit_names->type values */
#define AUDIT_TYPE_UNKNOWN 0 /* we don't know yet */
diff --git a/include/linux/audit_arch.h b/include/linux/audit_arch.h
index 0e34d673ef17..2b8153791e6a 100644
--- a/include/linux/audit_arch.h
+++ b/include/linux/audit_arch.h
@@ -23,4 +23,11 @@ enum auditsc_class_t {

extern int audit_classify_compat_syscall(int abi, unsigned syscall);

+/* only for compat system calls */
+extern unsigned compat_write_class[];
+extern unsigned compat_read_class[];
+extern unsigned compat_dir_class[];
+extern unsigned compat_chattr_class[];
+extern unsigned compat_signal_class[];
+
#endif
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d808253f2e94..e2dd3a6d495a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3200,6 +3200,11 @@ static inline void bpf_prog_report_arena_violation(bool write, unsigned long add
}
#endif /* CONFIG_BPF_SYSCALL */

+static inline bool bpf_net_capable(void)
+{
+ return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
+}
+
static __always_inline int
bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
{
diff --git a/include/linux/bpf_mprog.h b/include/linux/bpf_mprog.h
index 929225f7b095..0b9f4caeeb0a 100644
--- a/include/linux/bpf_mprog.h
+++ b/include/linux/bpf_mprog.h
@@ -340,4 +340,14 @@ static inline bool bpf_mprog_supported(enum bpf_prog_type type)
return false;
}
}
+
+static inline bool bpf_mprog_detach_empty(enum bpf_prog_type type)
+{
+ switch (type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ return bpf_net_capable();
+ default:
+ return false;
+ }
+}
#endif /* __BPF_MPROG_H */
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 1fb08922552c..37db92b3d6f8 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -203,6 +203,12 @@ static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns)
ns_capable(ns, CAP_SYS_ADMIN);
}

+static inline bool checkpoint_restore_ns_capable_noaudit(struct user_namespace *ns)
+{
+ return ns_capable_noaudit(ns, CAP_CHECKPOINT_RESTORE) ||
+ ns_capable_noaudit(ns, CAP_SYS_ADMIN);
+}
+
/* audit system wants to get cap info from files as well */
int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
const struct dentry *dentry,
diff --git a/include/linux/clk.h b/include/linux/clk.h
index b607482ca77e..64ff118ffb1a 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -228,6 +228,23 @@ int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk);
*/
void clk_rate_exclusive_put(struct clk *clk);

+/**
+ * clk_save_context - save clock context for poweroff
+ *
+ * Saves the context of the clock register for powerstates in which the
+ * contents of the registers will be lost. Occurs deep within the suspend
+ * code so locking is not necessary.
+ */
+int clk_save_context(void);
+
+/**
+ * clk_restore_context - restore clock context after poweroff
+ *
+ * This occurs with all clocks enabled. Occurs deep within the resume code
+ * so locking is not necessary.
+ */
+void clk_restore_context(void);
+
#else

static inline int clk_notifier_register(struct clk *clk,
@@ -293,6 +310,13 @@ static inline int devm_clk_rate_exclusive_get(struct device *dev, struct clk *cl

static inline void clk_rate_exclusive_put(struct clk *clk) {}

+static inline int clk_save_context(void)
+{
+ return 0;
+}
+
+static inline void clk_restore_context(void) {}
+
#endif

#ifdef CONFIG_HAVE_CLK_PREPARE
@@ -933,23 +957,6 @@ struct clk *clk_get_parent(struct clk *clk);
*/
struct clk *clk_get_sys(const char *dev_id, const char *con_id);

-/**
- * clk_save_context - save clock context for poweroff
- *
- * Saves the context of the clock register for powerstates in which the
- * contents of the registers will be lost. Occurs deep within the suspend
- * code so locking is not necessary.
- */
-int clk_save_context(void);
-
-/**
- * clk_restore_context - restore clock context after poweroff
- *
- * This occurs with all clocks enabled. Occurs deep within the resume code
- * so locking is not necessary.
- */
-void clk_restore_context(void);
-
#else /* !CONFIG_HAVE_CLK */

static inline struct clk *clk_get(struct device *dev, const char *id)
@@ -1129,13 +1136,6 @@ static inline struct clk *clk_get_sys(const char *dev_id, const char *con_id)
return NULL;
}

-static inline int clk_save_context(void)
-{
- return 0;
-}
-
-static inline void clk_restore_context(void) {}
-
#endif

/* clk_prepare_enable helps cases using clk_enable in non-atomic context. */
diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h
index 0864773a57e8..822085bc2d20 100644
--- a/include/linux/device_cgroup.h
+++ b/include/linux/device_cgroup.h
@@ -21,7 +21,7 @@ static inline int devcgroup_inode_permission(struct inode *inode, int mask)
if (likely(!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)))
return 0;

- if (likely(!inode->i_rdev))
+ if (!inode->i_rdev)
return 0;

if (S_ISBLK(inode->i_mode))
diff --git a/include/linux/dpll.h b/include/linux/dpll.h
index 25be745bf41f..562f520b23c2 100644
--- a/include/linux/dpll.h
+++ b/include/linux/dpll.h
@@ -163,6 +163,7 @@ struct dpll_pin_properties {
u32 freq_supported_num;
struct dpll_pin_frequency *freq_supported;
struct dpll_pin_phase_adjust_range phase_range;
+ u32 phase_gran;
};

#if IS_ENABLED(CONFIG_DPLL)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 569de3b14279..cf7a0bce1bb6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1375,24 +1375,13 @@ static inline bool bpf_jit_kallsyms_enabled(void)
return false;
}

-int __bpf_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char *sym);
+int bpf_address_lookup(unsigned long addr, unsigned long *size,
+ unsigned long *off, char *sym);
bool is_bpf_text_address(unsigned long addr);
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym);
struct bpf_prog *bpf_prog_ksym_find(unsigned long addr);

-static inline int
-bpf_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char **modname, char *sym)
-{
- int ret = __bpf_address_lookup(addr, size, off, sym);
-
- if (ret && modname)
- *modname = NULL;
- return ret;
-}
-
void bpf_prog_kallsyms_add(struct bpf_prog *fp);
void bpf_prog_kallsyms_del(struct bpf_prog *fp);

@@ -1431,8 +1420,8 @@ static inline bool bpf_jit_kallsyms_enabled(void)
}

static inline int
-__bpf_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char *sym)
+bpf_address_lookup(unsigned long addr, unsigned long *size,
+ unsigned long *off, char *sym)
{
return 0;
}
@@ -1453,13 +1442,6 @@ static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
return NULL;
}

-static inline int
-bpf_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char **modname, char *sym)
-{
- return 0;
-}
-
static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
{
}
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 07f8c309e432..9cc60e2506af 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -87,11 +87,13 @@ struct ftrace_hash;
defined(CONFIG_DYNAMIC_FTRACE)
int
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char **modname, char *sym);
+ unsigned long *off, char **modname,
+ const unsigned char **modbuildid, char *sym);
#else
static inline int
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char **modname, char *sym)
+ unsigned long *off, char **modname,
+ const unsigned char **modbuildid, char *sym)
{
return 0;
}
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index c4690e365ade..4cf418a41fe4 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -444,12 +444,16 @@ struct hisi_qp_ops {
int (*fill_sqe)(void *sqe, void *q_parm, void *d_parm);
};

+struct instance_backlog {
+ struct list_head list;
+ spinlock_t lock;
+};
+
struct hisi_qp {
u32 qp_id;
u16 sq_depth;
u16 cq_depth;
u8 alg_type;
- u8 req_type;

struct qm_dma qdma;
void *sqe;
@@ -459,7 +463,6 @@ struct hisi_qp {

struct hisi_qp_status qp_status;
struct hisi_qp_ops *hw_ops;
- void *qp_ctx;
void (*req_cb)(struct hisi_qp *qp, void *data);
void (*event_cb)(struct hisi_qp *qp);

@@ -468,6 +471,10 @@ struct hisi_qp {
bool is_in_kernel;
u16 pasid;
struct uacce_queue *uacce_q;
+
+ spinlock_t qp_lock;
+ struct instance_backlog backlog;
+ const void **msg;
};

static inline int vfs_num_set(const char *val, const struct kernel_param *kp)
@@ -572,7 +579,7 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
void hisi_acc_free_sgl_pool(struct device *dev,
struct hisi_acc_sgl_pool *pool);
int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num,
- u8 alg_type, int node, struct hisi_qp **qps);
+ u8 *alg_type, int node, struct hisi_qp **qps);
void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num);
void hisi_qm_dev_shutdown(struct pci_dev *pdev);
void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index b424555753b1..b77bc55a4cf3 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -15,6 +15,7 @@
#include <linux/completion.h>
#include <linux/kref.h>
#include <linux/types.h>
+#include <linux/workqueue_types.h>

/**
* struct hwrng - Hardware Random Number Generator driver
@@ -48,6 +49,7 @@ struct hwrng {
/* internal. */
struct list_head list;
struct kref ref;
+ struct work_struct cleanup_work;
struct completion cleanup_done;
struct completion dying;
};
diff --git a/include/linux/input/adp5589.h b/include/linux/input/adp5589.h
deleted file mode 100644
index 0e4742c8c81e..000000000000
--- a/include/linux/input/adp5589.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Analog Devices ADP5589/ADP5585 I/O Expander and QWERTY Keypad Controller
- *
- * Copyright 2010-2011 Analog Devices Inc.
- */
-
-#ifndef _ADP5589_H
-#define _ADP5589_H
-
-/*
- * ADP5589 specific GPI and Keymap defines
- */
-
-#define ADP5589_KEYMAPSIZE 88
-
-#define ADP5589_GPI_PIN_ROW0 97
-#define ADP5589_GPI_PIN_ROW1 98
-#define ADP5589_GPI_PIN_ROW2 99
-#define ADP5589_GPI_PIN_ROW3 100
-#define ADP5589_GPI_PIN_ROW4 101
-#define ADP5589_GPI_PIN_ROW5 102
-#define ADP5589_GPI_PIN_ROW6 103
-#define ADP5589_GPI_PIN_ROW7 104
-#define ADP5589_GPI_PIN_COL0 105
-#define ADP5589_GPI_PIN_COL1 106
-#define ADP5589_GPI_PIN_COL2 107
-#define ADP5589_GPI_PIN_COL3 108
-#define ADP5589_GPI_PIN_COL4 109
-#define ADP5589_GPI_PIN_COL5 110
-#define ADP5589_GPI_PIN_COL6 111
-#define ADP5589_GPI_PIN_COL7 112
-#define ADP5589_GPI_PIN_COL8 113
-#define ADP5589_GPI_PIN_COL9 114
-#define ADP5589_GPI_PIN_COL10 115
-#define GPI_LOGIC1 116
-#define GPI_LOGIC2 117
-
-#define ADP5589_GPI_PIN_ROW_BASE ADP5589_GPI_PIN_ROW0
-#define ADP5589_GPI_PIN_ROW_END ADP5589_GPI_PIN_ROW7
-#define ADP5589_GPI_PIN_COL_BASE ADP5589_GPI_PIN_COL0
-#define ADP5589_GPI_PIN_COL_END ADP5589_GPI_PIN_COL10
-
-#define ADP5589_GPI_PIN_BASE ADP5589_GPI_PIN_ROW_BASE
-#define ADP5589_GPI_PIN_END ADP5589_GPI_PIN_COL_END
-
-#define ADP5589_GPIMAPSIZE_MAX (ADP5589_GPI_PIN_END - ADP5589_GPI_PIN_BASE + 1)
-
-/*
- * ADP5585 specific GPI and Keymap defines
- */
-
-#define ADP5585_KEYMAPSIZE 30
-
-#define ADP5585_GPI_PIN_ROW0 37
-#define ADP5585_GPI_PIN_ROW1 38
-#define ADP5585_GPI_PIN_ROW2 39
-#define ADP5585_GPI_PIN_ROW3 40
-#define ADP5585_GPI_PIN_ROW4 41
-#define ADP5585_GPI_PIN_ROW5 42
-#define ADP5585_GPI_PIN_COL0 43
-#define ADP5585_GPI_PIN_COL1 44
-#define ADP5585_GPI_PIN_COL2 45
-#define ADP5585_GPI_PIN_COL3 46
-#define ADP5585_GPI_PIN_COL4 47
-#define GPI_LOGIC 48
-
-#define ADP5585_GPI_PIN_ROW_BASE ADP5585_GPI_PIN_ROW0
-#define ADP5585_GPI_PIN_ROW_END ADP5585_GPI_PIN_ROW5
-#define ADP5585_GPI_PIN_COL_BASE ADP5585_GPI_PIN_COL0
-#define ADP5585_GPI_PIN_COL_END ADP5585_GPI_PIN_COL4
-
-#define ADP5585_GPI_PIN_BASE ADP5585_GPI_PIN_ROW_BASE
-#define ADP5585_GPI_PIN_END ADP5585_GPI_PIN_COL_END
-
-#define ADP5585_GPIMAPSIZE_MAX (ADP5585_GPI_PIN_END - ADP5585_GPI_PIN_BASE + 1)
-
-struct adp5589_gpi_map {
- unsigned short pin;
- unsigned short sw_evt;
-};
-
-/* scan_cycle_time */
-#define ADP5589_SCAN_CYCLE_10ms 0
-#define ADP5589_SCAN_CYCLE_20ms 1
-#define ADP5589_SCAN_CYCLE_30ms 2
-#define ADP5589_SCAN_CYCLE_40ms 3
-
-/* RESET_CFG */
-#define RESET_PULSE_WIDTH_500us 0
-#define RESET_PULSE_WIDTH_1ms 1
-#define RESET_PULSE_WIDTH_2ms 2
-#define RESET_PULSE_WIDTH_10ms 3
-
-#define RESET_TRIG_TIME_0ms (0 << 2)
-#define RESET_TRIG_TIME_1000ms (1 << 2)
-#define RESET_TRIG_TIME_1500ms (2 << 2)
-#define RESET_TRIG_TIME_2000ms (3 << 2)
-#define RESET_TRIG_TIME_2500ms (4 << 2)
-#define RESET_TRIG_TIME_3000ms (5 << 2)
-#define RESET_TRIG_TIME_3500ms (6 << 2)
-#define RESET_TRIG_TIME_4000ms (7 << 2)
-
-#define RESET_PASSTHRU_EN (1 << 5)
-#define RESET1_POL_HIGH (1 << 6)
-#define RESET1_POL_LOW (0 << 6)
-#define RESET2_POL_HIGH (1 << 7)
-#define RESET2_POL_LOW (0 << 7)
-
-/* ADP5589 Mask Bits:
- * C C C C C C C C C C C | R R R R R R R R
- * 1 9 8 7 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0
- * 0
- * ---------------- BIT ------------------
- * 1 1 1 1 1 1 1 1 1 0 0 | 0 0 0 0 0 0 0 0
- * 8 7 6 5 4 3 2 1 0 9 8 | 7 6 5 4 3 2 1 0
- */
-
-#define ADP_ROW(x) (1 << (x))
-#define ADP_COL(x) (1 << (x + 8))
-#define ADP5589_ROW_MASK 0xFF
-#define ADP5589_COL_MASK 0xFF
-#define ADP5589_COL_SHIFT 8
-#define ADP5589_MAX_ROW_NUM 7
-#define ADP5589_MAX_COL_NUM 10
-
-/* ADP5585 Mask Bits:
- * C C C C C | R R R R R R
- * 4 3 2 1 0 | 5 4 3 2 1 0
- *
- * ---- BIT -- -----------
- * 1 0 0 0 0 | 0 0 0 0 0 0
- * 0 9 8 7 6 | 5 4 3 2 1 0
- */
-
-#define ADP5585_ROW_MASK 0x3F
-#define ADP5585_COL_MASK 0x1F
-#define ADP5585_ROW_SHIFT 0
-#define ADP5585_COL_SHIFT 6
-#define ADP5585_MAX_ROW_NUM 5
-#define ADP5585_MAX_COL_NUM 4
-
-#define ADP5585_ROW(x) (1 << ((x) & ADP5585_ROW_MASK))
-#define ADP5585_COL(x) (1 << (((x) & ADP5585_COL_MASK) + ADP5585_COL_SHIFT))
-
-/* Put one of these structures in i2c_board_info platform_data */
-
-struct adp5589_kpad_platform_data {
- unsigned keypad_en_mask; /* Keypad (Rows/Columns) enable mask */
- const unsigned short *keymap; /* Pointer to keymap */
- unsigned short keymapsize; /* Keymap size */
- bool repeat; /* Enable key repeat */
- bool en_keylock; /* Enable key lock feature (ADP5589 only)*/
- unsigned char unlock_key1; /* Unlock Key 1 (ADP5589 only) */
- unsigned char unlock_key2; /* Unlock Key 2 (ADP5589 only) */
- unsigned char unlock_timer; /* Time in seconds [0..7] between the two unlock keys 0=disable (ADP5589 only) */
- unsigned char scan_cycle_time; /* Time between consecutive scan cycles */
- unsigned char reset_cfg; /* Reset config */
- unsigned short reset1_key_1; /* Reset Key 1 */
- unsigned short reset1_key_2; /* Reset Key 2 */
- unsigned short reset1_key_3; /* Reset Key 3 */
- unsigned short reset2_key_1; /* Reset Key 1 */
- unsigned short reset2_key_2; /* Reset Key 2 */
- unsigned debounce_dis_mask; /* Disable debounce mask */
- unsigned pull_dis_mask; /* Disable all pull resistors mask */
- unsigned pullup_en_100k; /* Pull-Up 100k Enable Mask */
- unsigned pullup_en_300k; /* Pull-Up 300k Enable Mask */
- unsigned pulldown_en_300k; /* Pull-Down 300k Enable Mask */
- const struct adp5589_gpi_map *gpimap;
- unsigned short gpimapsize;
- const struct adp5589_gpio_platform_data *gpio_data;
-};
-
-struct i2c_client; /* forward declaration */
-
-struct adp5589_gpio_platform_data {
- int gpio_start; /* GPIO Chip base # */
-};
-
-#endif
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 51b6484c0493..8f1166bc3b1c 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -215,7 +215,7 @@ static inline int __must_check
devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler,
unsigned long irqflags, const char *devname, void *dev_id)
{
- return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags,
+ return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags | IRQF_COND_ONESHOT,
devname, dev_id);
}

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index c2ea6280901d..b4d8aca3e786 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -439,6 +439,9 @@ struct io_ring_ctx {
struct list_head defer_list;
unsigned nr_drained;

+ /* protected by ->completion_lock */
+ unsigned nr_req_allocated;
+
#ifdef CONFIG_NET_RX_BUSY_POLL
struct list_head napi_list; /* track busy poll napi_id */
spinlock_t napi_lock; /* napi_list lock */
@@ -451,10 +454,6 @@ struct io_ring_ctx {
DECLARE_HASHTABLE(napi_ht, 4);
#endif

- /* protected by ->completion_lock */
- unsigned evfd_last_cq_tail;
- unsigned nr_req_allocated;
-
/*
* Protection for resize vs mmap races - both the mmap and resize
* side will need to grab this lock, to prevent either side from
diff --git a/include/linux/leds-expresswire.h b/include/linux/leds-expresswire.h
index a422921f4159..7f8c4795f69f 100644
--- a/include/linux/leds-expresswire.h
+++ b/include/linux/leds-expresswire.h
@@ -30,9 +30,6 @@ struct expresswire_common_props {

void expresswire_power_off(struct expresswire_common_props *props);
void expresswire_enable(struct expresswire_common_props *props);
-void expresswire_start(struct expresswire_common_props *props);
-void expresswire_end(struct expresswire_common_props *props);
-void expresswire_set_bit(struct expresswire_common_props *props, bool bit);
void expresswire_write_u8(struct expresswire_common_props *props, u8 val);

#endif /* _LEDS_EXPRESSWIRE_H */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 7a98de1cc995..3b8bdea8516d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -899,6 +899,9 @@ struct ata_port {
u64 qc_active;
int nr_active_links; /* #links with active qcs */

+ struct work_struct deferred_qc_work;
+ struct ata_queued_cmd *deferred_qc;
+
struct ata_link link; /* host default link */
struct ata_link *slave_link; /* see ata_slave_link_init() */

diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 5f70d3b5d1b1..097ef4dfcdac 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -667,7 +667,7 @@ static inline int wm8350_register_irq(struct wm8350 *wm8350, int irq,
return -ENODEV;

return request_threaded_irq(irq + wm8350->irq_base, NULL,
- handler, flags, name, data);
+ handler, flags | IRQF_ONESHOT, name, data);
}

static inline void wm8350_free_irq(struct wm8350 *wm8350, int irq, void *data)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5405ca1038f9..85c2b3d358ec 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1274,12 +1274,12 @@ static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev)
static inline int mlx5_core_is_mp_slave(struct mlx5_core_dev *dev)
{
return MLX5_CAP_GEN(dev, affiliate_nic_vport_criteria) &&
- MLX5_CAP_GEN(dev, num_vhca_ports) <= 1;
+ MLX5_CAP_GEN_MAX(dev, num_vhca_ports) <= 1;
}

static inline int mlx5_core_is_mp_master(struct mlx5_core_dev *dev)
{
- return MLX5_CAP_GEN(dev, num_vhca_ports) > 1;
+ return MLX5_CAP_GEN_MAX(dev, num_vhca_ports) > 1;
}

static inline int mlx5_core_mp_enabled(struct mlx5_core_dev *dev)
diff --git a/include/linux/module.h b/include/linux/module.h
index e135cc79acee..4decae2b1675 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -747,6 +747,15 @@ static inline void __module_get(struct module *module)
__mod ? __mod->name : "kernel"; \
})

+static inline const unsigned char *module_buildid(struct module *mod)
+{
+#ifdef CONFIG_STACKTRACE_BUILD_ID
+ return mod->build_id;
+#else
+ return NULL;
+#endif
+}
+
/* Dereference module function descriptor */
void *dereference_module_function_descriptor(struct module *mod, void *ptr);

diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 927c10d78769..1c741145e497 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -287,7 +287,7 @@ struct spinand_device;

/**
* struct spinand_id - SPI NAND id structure
- * @data: buffer containing the id bytes. Currently 4 bytes large, but can
+ * @data: buffer containing the id bytes. Currently 5 bytes large, but can
* be extended if required
* @len: ID length
*/
diff --git a/include/linux/psp.h b/include/linux/psp.h
index 92e60aeef21e..b337dcce1e99 100644
--- a/include/linux/psp.h
+++ b/include/linux/psp.h
@@ -18,6 +18,7 @@
* and should include an appropriate local definition in their source file.
*/
#define PSP_CMDRESP_STS GENMASK(15, 0)
+#define PSP_TEE_STS_RING_BUSY 0x0000000d /* Ring already initialized */
#define PSP_CMDRESP_CMD GENMASK(23, 16)
#define PSP_CMDRESP_RESERVED GENMASK(29, 24)
#define PSP_CMDRESP_RECOVERY BIT(30)
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 49847888c287..829b281d6c9c 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -97,6 +97,8 @@ struct sk_psock {
struct sk_buff_head ingress_skb;
struct list_head ingress_msg;
spinlock_t ingress_lock;
+ /** @msg_tot_len: Total bytes queued in ingress_msg list. */
+ u32 msg_tot_len;
unsigned long state;
struct list_head link;
spinlock_t link_lock;
@@ -141,6 +143,8 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
int len, int flags);
+int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ int len, int flags, int *copied_from_self);
bool sk_msg_is_readable(struct sock *sk);

static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
@@ -319,6 +323,27 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
}

+static inline u32 sk_psock_get_msg_len_nolock(struct sk_psock *psock)
+{
+ /* Used by ioctl to read msg_tot_len only; lock-free for performance */
+ return READ_ONCE(psock->msg_tot_len);
+}
+
+static inline void sk_psock_msg_len_add_locked(struct sk_psock *psock, int diff)
+{
+ /* Use WRITE_ONCE to ensure correct read in sk_psock_get_msg_len_nolock().
+ * ingress_lock should be held to prevent concurrent updates to msg_tot_len
+ */
+ WRITE_ONCE(psock->msg_tot_len, psock->msg_tot_len + diff);
+}
+
+static inline void sk_psock_msg_len_add(struct sk_psock *psock, int diff)
+{
+ spin_lock_bh(&psock->ingress_lock);
+ sk_psock_msg_len_add_locked(psock, diff);
+ spin_unlock_bh(&psock->ingress_lock);
+}
+
static inline bool sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
@@ -327,6 +352,7 @@ static inline bool sk_psock_queue_msg(struct sk_psock *psock,
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
list_add_tail(&msg->list, &psock->ingress_msg);
+ sk_psock_msg_len_add_locked(psock, msg->sg.size);
ret = true;
} else {
sk_msg_free(psock->sk, msg);
@@ -343,18 +369,25 @@ static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)

spin_lock_bh(&psock->ingress_lock);
msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
- if (msg)
+ if (msg) {
list_del(&msg->list);
+ sk_psock_msg_len_add_locked(psock, -msg->sg.size);
+ }
spin_unlock_bh(&psock->ingress_lock);
return msg;
}

+static inline struct sk_msg *sk_psock_peek_msg_locked(struct sk_psock *psock)
+{
+ return list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+}
+
static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock)
{
struct sk_msg *msg;

spin_lock_bh(&psock->ingress_lock);
- msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+ msg = sk_psock_peek_msg_locked(psock);
spin_unlock_bh(&psock->ingress_lock);
return msg;
}
@@ -521,6 +554,39 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
return !!psock->saved_data_ready;
}

+/* for tcp only, sk is locked */
+static inline ssize_t sk_psock_msg_inq(struct sock *sk)
+{
+ struct sk_psock *psock;
+ ssize_t inq = 0;
+
+ psock = sk_psock_get(sk);
+ if (likely(psock)) {
+ inq = sk_psock_get_msg_len_nolock(psock);
+ sk_psock_put(sk, psock);
+ }
+ return inq;
+}
+
+/* for udp only, sk is not locked */
+static inline ssize_t sk_msg_first_len(struct sock *sk)
+{
+ struct sk_psock *psock;
+ struct sk_msg *msg;
+ ssize_t inq = 0;
+
+ psock = sk_psock_get(sk);
+ if (likely(psock)) {
+ spin_lock_bh(&psock->ingress_lock);
+ msg = sk_psock_peek_msg_locked(psock);
+ if (msg)
+ inq = msg->sg.size;
+ spin_unlock_bh(&psock->ingress_lock);
+ sk_psock_put(sk, psock);
+ }
+ return inq;
+}
+
#if IS_ENABLED(CONFIG_NET_SOCK_MSG)

#define BPF_F_STRPARSER (1UL << 1)
diff --git a/include/linux/soc/qcom/ubwc.h b/include/linux/soc/qcom/ubwc.h
index 1ed8b1b16bc9..d9dfc9edc1b2 100644
--- a/include/linux/soc/qcom/ubwc.h
+++ b/include/linux/soc/qcom/ubwc.h
@@ -8,6 +8,7 @@
#define __QCOM_UBWC_H__

#include <linux/bits.h>
+#include <linux/printk.h>
#include <linux/types.h>

struct qcom_ubwc_cfg_data {
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index fa1318bac06c..151c81c560c8 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -171,7 +171,13 @@ struct dwmac4_addrs {
u32 mtl_low_cred_offset;
};

-#define STMMAC_FLAG_HAS_INTEGRATED_PCS BIT(0)
+enum dwmac_core_type {
+ DWMAC_CORE_MAC100,
+ DWMAC_CORE_GMAC,
+ DWMAC_CORE_GMAC4,
+ DWMAC_CORE_XGMAC,
+};
+
#define STMMAC_FLAG_SPH_DISABLE BIT(1)
#define STMMAC_FLAG_USE_PHY_WOL BIT(2)
#define STMMAC_FLAG_HAS_SUN8I BIT(3)
@@ -187,6 +193,7 @@ struct dwmac4_addrs {
#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(13)

struct plat_stmmacenet_data {
+ enum dwmac_core_type core_type;
int bus_id;
int phy_addr;
/* MAC ----- optional PCS ----- SerDes ----- optional PHY ----- Media
@@ -220,7 +227,6 @@ struct plat_stmmacenet_data {
struct stmmac_dma_cfg *dma_cfg;
struct stmmac_safety_feature_cfg *safety_feat_cfg;
int clk_csr;
- int has_gmac;
int enh_desc;
int tx_coe;
int rx_coe;
@@ -283,10 +289,8 @@ struct plat_stmmacenet_data {
struct reset_control *stmmac_rst;
struct reset_control *stmmac_ahb_rst;
struct stmmac_axi *axi;
- int has_gmac4;
int rss_en;
int mac_port_sel_speed;
- int has_xgmac;
u8 vlan_fail_q;
struct pci_dev *pdev;
int int_snapshot_num;
diff --git a/include/linux/sunrpc/xdrgen/_builtins.h b/include/linux/sunrpc/xdrgen/_builtins.h
index 66ca3ece951a..a5ab75d2db04 100644
--- a/include/linux/sunrpc/xdrgen/_builtins.h
+++ b/include/linux/sunrpc/xdrgen/_builtins.h
@@ -188,12 +188,10 @@ xdrgen_decode_string(struct xdr_stream *xdr, string *ptr, u32 maxlen)
return false;
if (unlikely(maxlen && len > maxlen))
return false;
- if (len != 0) {
- p = xdr_inline_decode(xdr, len);
- if (unlikely(!p))
- return false;
- ptr->data = (unsigned char *)p;
- }
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ return false;
+ ptr->data = (unsigned char *)p;
ptr->len = len;
return true;
}
@@ -219,12 +217,10 @@ xdrgen_decode_opaque(struct xdr_stream *xdr, opaque *ptr, u32 maxlen)
return false;
if (unlikely(maxlen && len > maxlen))
return false;
- if (len != 0) {
- p = xdr_inline_decode(xdr, len);
- if (unlikely(!p))
- return false;
- ptr->data = (u8 *)p;
- }
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ return false;
+ ptr->data = (u8 *)p;
ptr->len = len;
return true;
}
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index 457879938fc1..3366090a86bd 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -89,6 +89,11 @@ static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
local64_add(val, &p->v);
}

+static inline void u64_stats_sub(u64_stats_t *p, s64 val)
+{
+ local64_sub(val, &p->v);
+}
+
static inline void u64_stats_inc(u64_stats_t *p)
{
local64_inc(&p->v);
@@ -130,6 +135,11 @@ static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
p->v += val;
}

+static inline void u64_stats_sub(u64_stats_t *p, s64 val)
+{
+ p->v -= val;
+}
+
static inline void u64_stats_inc(u64_stats_t *p)
{
p->v++;
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index ea32393464a2..827b87a95dab 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -51,11 +51,25 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
return outer;
}

+/* Apply either ECT(0) or ECT(1) */
+static inline void __INET_ECN_xmit(struct sock *sk, bool use_ect_1)
+{
+ __u8 ect = use_ect_1 ? INET_ECN_ECT_1 : INET_ECN_ECT_0;
+
+ /* Mask the complete byte in case the connection alternates between
+ * ECT(0) and ECT(1).
+ */
+ inet_sk(sk)->tos &= ~INET_ECN_MASK;
+ inet_sk(sk)->tos |= ect;
+ if (inet6_sk(sk)) {
+ inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
+ inet6_sk(sk)->tclass |= ect;
+ }
+}
+
static inline void INET_ECN_xmit(struct sock *sk)
{
- inet_sk(sk)->tos |= INET_ECN_ECT_0;
- if (inet6_sk(sk) != NULL)
- inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
+ __INET_ECN_xmit(sk, false);
}

static inline void INET_ECN_dontxmit(struct sock *sk)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 2ccdf85f34f1..f0936df7567e 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1280,12 +1280,15 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex,

static inline int ip6_sock_set_v6only(struct sock *sk)
{
- if (inet_sk(sk)->inet_num)
- return -EINVAL;
+ int ret = 0;
+
lock_sock(sk);
- sk->sk_ipv6only = true;
+ if (inet_sk(sk)->inet_num)
+ ret = -EINVAL;
+ else
+ sk->sk_ipv6only = true;
release_sock(sk);
- return 0;
+ return ret;
}

static inline void ip6_sock_set_recverr(struct sock *sk)
diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index 52a06de41aa0..cf0166520cf3 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -13,6 +13,7 @@ struct nf_conncount_list {
u32 last_gc; /* jiffies at most recent gc */
struct list_head head; /* connections with the same filtering key */
unsigned int count; /* length of list */
+ unsigned int last_gc_count; /* length of list at most recent gc */
};

struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen);
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 4aeffddb7586..45eb26b2e95b 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -6,11 +6,13 @@
#include <linux/ipv6.h>
#include <linux/jhash.h>
#include <linux/netfilter.h>
+#include <linux/rhashtable-types.h>
#include <linux/skbuff.h>

/* Each queued (to userspace) skbuff has one of these. */
struct nf_queue_entry {
struct list_head list;
+ struct rhash_head hash_node;
struct sk_buff *skb;
unsigned int id;
unsigned int hook_index; /* index in hook_entries->hook[] */
@@ -19,7 +21,9 @@ struct nf_queue_entry {
struct net_device *physout;
#endif
struct nf_hook_state state;
+ bool nf_ct_is_unconfirmed;
u16 size; /* sizeof(entry) + saved route keys */
+ u16 queue_num;

/* extra space to store route keys */
};
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 0e266c2d0e7f..7eac73f9b4ce 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -278,6 +278,8 @@ struct nft_userdata {
unsigned char data[];
};

+#define NFT_SET_ELEM_INTERNAL_LAST 0x1
+
/* placeholder structure for opaque set element backend representation. */
struct nft_elem_priv { };

@@ -287,6 +289,7 @@ struct nft_elem_priv { };
* @key: element key
* @key_end: closing element key
* @data: element data
+ * @flags: flags
* @priv: element private data and extensions
*/
struct nft_set_elem {
@@ -302,6 +305,7 @@ struct nft_set_elem {
u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)];
struct nft_data val;
} data;
+ u32 flags;
struct nft_elem_priv *priv;
};

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 34eb3aecb3f2..62166da04554 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -87,6 +87,12 @@ struct netns_ipv4 {
int sysctl_tcp_rmem[3];
__cacheline_group_end(netns_ipv4_read_rx);

+ /* ICMP rate limiter hot cache line. */
+ __cacheline_group_begin_aligned(icmp);
+ atomic_t icmp_global_credit;
+ u32 icmp_global_stamp;
+ __cacheline_group_end_aligned(icmp);
+
struct inet_timewait_death_row tcp_death_row;
struct udp_table *udp_table;

@@ -139,8 +145,7 @@ struct netns_ipv4 {
int sysctl_icmp_ratemask;
int sysctl_icmp_msgs_per_sec;
int sysctl_icmp_msgs_burst;
- atomic_t icmp_global_credit;
- u32 icmp_global_stamp;
+
u32 ip_rt_min_pmtu;
int ip_rt_mtu_expires;
int ip_rt_min_advmss;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ab20f549b8f9..3c84d95cdba8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1190,7 +1190,15 @@ enum tcp_ca_ack_event_flags {
#define TCP_CONG_NON_RESTRICTED BIT(0)
/* Requires ECN/ECT set on all packets */
#define TCP_CONG_NEEDS_ECN BIT(1)
-#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
+/* Require successfully negotiated AccECN capability */
+#define TCP_CONG_NEEDS_ACCECN BIT(2)
+/* Use ECT(1) instead of ECT(0) while the CA is uninitialized */
+#define TCP_CONG_ECT_1_NEGOTIATION BIT(3)
+/* Cannot fallback to RFC3168 during AccECN negotiation */
+#define TCP_CONG_NO_FALLBACK_RFC3168 BIT(4)
+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN | \
+ TCP_CONG_NEEDS_ACCECN | TCP_CONG_ECT_1_NEGOTIATION | \
+ TCP_CONG_NO_FALLBACK_RFC3168)

union tcp_cc_info;

@@ -1322,6 +1330,27 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk)
return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN;
}

+static inline bool tcp_ca_needs_accecn(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ACCECN;
+}
+
+static inline bool tcp_ca_ect_1_negotiation(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & TCP_CONG_ECT_1_NEGOTIATION;
+}
+
+static inline bool tcp_ca_no_fallback_rfc3168(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & TCP_CONG_NO_FALLBACK_RFC3168;
+}
+
static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index f13e5cd2b1ac..a709fb1756eb 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -31,6 +31,12 @@ enum tcp_accecn_option {
TCP_ACCECN_OPTION_FULL = 2,
};

+/* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */
+static inline void INET_ECN_xmit_ect_1_negotiation(struct sock *sk)
+{
+ __INET_ECN_xmit(sk, tcp_ca_ect_1_negotiation(sk));
+}
+
static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp)
{
/* Do not set CWR if in AccECN mode! */
@@ -467,6 +473,26 @@ static inline u8 tcp_accecn_option_init(const struct sk_buff *skb,
return TCP_ACCECN_OPT_COUNTER_SEEN;
}

+static inline void tcp_ecn_rcv_synack_accecn(struct sock *sk,
+ const struct sk_buff *skb, u8 dsf)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
+ tp->syn_ect_rcv = dsf & INET_ECN_MASK;
+ /* Demand Accurate ECN option in response to the SYN on the SYN/ACK
+ * and the TCP server will try to send one more packet with an AccECN
+ * Option at a later point during the connection.
+ */
+ if (tp->rx_opt.accecn &&
+ tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
+ u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn);
+
+ tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
+ tp->accecn_opt_demand = 2;
+ }
+}
+
/* See Table 2 of the AccECN draft */
static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb,
const struct tcphdr *th, u8 ip_dsfield)
@@ -489,32 +515,32 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
break;
case 0x1:
- case 0x5:
/* +========+========+============+=============+
* | A | B | SYN/ACK | Feedback |
* | | | B->A | Mode of A |
* | | | AE CWR ECE | |
* +========+========+============+=============+
- * | AccECN | Nonce | 1 0 1 | (Reserved) |
* | AccECN | ECN | 0 0 1 | Classic ECN |
* | Nonce | AccECN | 0 0 1 | Classic ECN |
* | ECN | AccECN | 0 0 1 | Classic ECN |
* +========+========+============+=============+
*/
- if (tcp_ecn_mode_pending(tp))
- /* Downgrade from AccECN, or requested initially */
+ if (tcp_ca_no_fallback_rfc3168(sk))
+ tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
+ else
tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
break;
- default:
- tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
- tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK;
- if (tp->rx_opt.accecn &&
- tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
- u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn);
-
- tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
- tp->accecn_opt_demand = 2;
+ case 0x5:
+ if (tcp_ecn_mode_pending(tp)) {
+ tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield);
+ if (INET_ECN_is_ce(ip_dsfield)) {
+ tp->received_ce++;
+ tp->received_ce_pending++;
+ }
}
+ break;
+ default:
+ tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield);
if (INET_ECN_is_ce(ip_dsfield) &&
tcp_accecn_validate_syn_feedback(sk, ace,
tp->syn_ect_snt)) {
@@ -525,9 +551,11 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb
}
}

-static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th,
+static inline void tcp_ecn_rcv_syn(struct sock *sk, const struct tcphdr *th,
const struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (tcp_ecn_mode_pending(tp)) {
if (!tcp_accecn_syn_requested(th)) {
/* Downgrade to classic ECN feedback */
@@ -539,7 +567,8 @@ static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th,
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
}
}
- if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr))
+ if (tcp_ecn_mode_rfc3168(tp) &&
+ (!th->ece || !th->cwr || tcp_ca_no_fallback_rfc3168(sk)))
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
}

@@ -561,7 +590,7 @@ static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
else if (tcp_ca_needs_ecn(sk) ||
tcp_bpf_ca_needs_ecn(sk))
- INET_ECN_xmit(sk);
+ INET_ECN_xmit_ect_1_negotiation(sk);

if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) {
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
@@ -579,7 +608,8 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
bool use_ecn, use_accecn;
u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);

- use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN;
+ use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN ||
+ tcp_ca_needs_accecn(sk);
use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN ||
tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN ||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn;
@@ -595,7 +625,7 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)

if (use_ecn) {
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
- INET_ECN_xmit(sk);
+ INET_ECN_xmit_ect_1_negotiation(sk);

TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
if (use_accecn) {
diff --git a/include/rdma/rw.h b/include/rdma/rw.h
index d606cac48233..9a8f4b76ce58 100644
--- a/include/rdma/rw.h
+++ b/include/rdma/rw.h
@@ -66,6 +66,8 @@ int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,

unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
unsigned int maxpages);
+unsigned int rdma_rw_max_send_wr(struct ib_device *dev, u32 port_num,
+ unsigned int max_rdma_ctxs, u32 create_flags);
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr);
int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr);
void rdma_rw_cleanup_mrs(struct ib_qp *qp);
diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h
index ea68856e4c8c..86fd74146c33 100644
--- a/include/sound/sdca_function.h
+++ b/include/sound/sdca_function.h
@@ -771,6 +771,7 @@ struct sdca_control {
u8 layers;

bool deferrable;
+ bool is_volatile;
bool has_default;
bool has_fixed;
};
diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h
index ab1725a954d7..69d35570ac4f 100644
--- a/include/uapi/linux/dpll.h
+++ b/include/uapi/linux/dpll.h
@@ -251,6 +251,7 @@ enum dpll_a_pin {
DPLL_A_PIN_ESYNC_FREQUENCY_SUPPORTED,
DPLL_A_PIN_ESYNC_PULSE,
DPLL_A_PIN_REFERENCE_SYNC,
+ DPLL_A_PIN_PHASE_ADJUST_GRAN,

__DPLL_A_PIN_MAX,
DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1)
diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h
index 71c7196d3281..e629c4953534 100644
--- a/include/uapi/linux/nfs.h
+++ b/include/uapi/linux/nfs.h
@@ -55,7 +55,7 @@
NFSERR_NODEV = 19, /* v2 v3 v4 */
NFSERR_NOTDIR = 20, /* v2 v3 v4 */
NFSERR_ISDIR = 21, /* v2 v3 v4 */
- NFSERR_INVAL = 22, /* v2 v3 v4 */
+ NFSERR_INVAL = 22, /* v3 v4 */
NFSERR_FBIG = 27, /* v2 v3 v4 */
NFSERR_NOSPC = 28, /* v2 v3 v4 */
NFSERR_ROFS = 30, /* v2 v3 v4 */
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index d949db3a4675..17fe07dac6a7 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1350,17 +1350,13 @@ static inline void *ufshcd_get_variant(struct ufs_hba *hba)
return hba->priv;
}

-#ifdef CONFIG_PM
extern int ufshcd_runtime_suspend(struct device *dev);
extern int ufshcd_runtime_resume(struct device *dev);
-#endif
-#ifdef CONFIG_PM_SLEEP
extern int ufshcd_system_suspend(struct device *dev);
extern int ufshcd_system_resume(struct device *dev);
extern int ufshcd_system_freeze(struct device *dev);
extern int ufshcd_system_thaw(struct device *dev);
extern int ufshcd_system_restore(struct device *dev);
-#endif

extern int ufshcd_dme_reset(struct ufs_hba *hba);
extern int ufshcd_dme_enable(struct ufs_hba *hba);
diff --git a/include/xen/xen.h b/include/xen/xen.h
index 61854e3f2837..f280c5dcf923 100644
--- a/include/xen/xen.h
+++ b/include/xen/xen.h
@@ -69,11 +69,13 @@ extern u64 xen_saved_max_mem_size;
#endif

#ifdef CONFIG_XEN_UNPOPULATED_ALLOC
+extern unsigned long xen_unpopulated_pages;
int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages);
void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages);
#include <linux/ioport.h>
int arch_xen_unpopulated_init(struct resource **res);
#else
+#define xen_unpopulated_pages 0UL
#include <xen/balloon.h>
static inline int xen_alloc_unpopulated_pages(unsigned int nr_pages,
struct page **pages)
diff --git a/io_uring/cancel.h b/io_uring/cancel.h
index 43e9bb74e9d1..eaa4069e258c 100644
--- a/io_uring/cancel.h
+++ b/io_uring/cancel.h
@@ -6,10 +6,8 @@

struct io_cancel_data {
struct io_ring_ctx *ctx;
- union {
- u64 data;
- struct file *file;
- };
+ u64 data;
+ struct file *file;
u8 opcode;
u32 flags;
int seq;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 104192bcc8e4..65af47b9135b 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -148,7 +148,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
static void io_queue_sqe(struct io_kiocb *req, unsigned int extra_flags);
static void __io_req_caches_free(struct io_ring_ctx *ctx);

-static __read_mostly DEFINE_STATIC_KEY_FALSE(io_key_has_sqarray);
+static __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(io_key_has_sqarray, HZ);

struct kmem_cache *req_cachep;
static struct workqueue_struct *iou_wq __ro_after_init;
@@ -2390,7 +2390,7 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
unsigned mask = ctx->sq_entries - 1;
unsigned head = ctx->cached_sq_head++ & mask;

- if (static_branch_unlikely(&io_key_has_sqarray) &&
+ if (static_branch_unlikely(&io_key_has_sqarray.key) &&
(!(ctx->flags & IORING_SETUP_NO_SQARRAY))) {
head = READ_ONCE(ctx->sq_array[head]);
if (unlikely(head >= ctx->sq_entries)) {
@@ -2869,7 +2869,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_rings_free(ctx);

if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
- static_branch_dec(&io_key_has_sqarray);
+ static_branch_slow_dec_deferred(&io_key_has_sqarray);

percpu_ref_exit(&ctx->refs);
free_uid(ctx->user);
@@ -3490,7 +3490,11 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,

ctx = file->private_data;
ret = -EBADFD;
- if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED))
+ /*
+ * Keep IORING_SETUP_R_DISABLED check before submitter_task load
+ * in io_uring_add_tctx_node() -> __io_uring_add_tctx_node_from_submit()
+ */
+ if (unlikely(smp_load_acquire(&ctx->flags) & IORING_SETUP_R_DISABLED))
goto out;

/*
@@ -3813,7 +3817,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
ctx->clock_offset = 0;

if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
- static_branch_inc(&io_key_has_sqarray);
+ static_branch_deferred_inc(&io_key_has_sqarray);

if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
!(ctx->flags & IORING_SETUP_IOPOLL) &&
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index d974381d93ff..308ef71bcb28 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -669,8 +669,9 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
bl->buf_ring = br;
if (reg.flags & IOU_PBUF_RING_INC)
bl->flags |= IOBL_INC;
- io_buffer_add_list(ctx, bl, reg.bgid);
- return 0;
+ ret = io_buffer_add_list(ctx, bl, reg.bgid);
+ if (!ret)
+ return 0;
fail:
io_free_region(ctx, &bl->region);
kfree(bl);
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 5e5b94236d72..bce74a8b64c6 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -124,7 +124,11 @@ static int __io_msg_ring_data(struct io_ring_ctx *target_ctx,
return -EINVAL;
if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd)
return -EINVAL;
- if (target_ctx->flags & IORING_SETUP_R_DISABLED)
+ /*
+ * Keep IORING_SETUP_R_DISABLED check before submitter_task load
+ * in io_msg_data_remote() -> io_msg_remote_post()
+ */
+ if (smp_load_acquire(&target_ctx->flags) & IORING_SETUP_R_DISABLED)
return -EBADFD;

if (io_msg_need_remote(target_ctx))
@@ -244,7 +248,11 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
return -EINVAL;
if (target_ctx == ctx)
return -EINVAL;
- if (target_ctx->flags & IORING_SETUP_R_DISABLED)
+ /*
+ * Keep IORING_SETUP_R_DISABLED check before submitter_task load
+ * in io_msg_fd_remote()
+ */
+ if (smp_load_acquire(&target_ctx->flags) & IORING_SETUP_R_DISABLED)
return -EBADFD;
if (!msg->src_file) {
int ret = io_msg_grab_file(req, issue_flags);
diff --git a/io_uring/register.c b/io_uring/register.c
index d189b266b8cc..db53e664348d 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -193,7 +193,8 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx)
if (ctx->restrictions.registered)
ctx->restricted = 1;

- ctx->flags &= ~IORING_SETUP_R_DISABLED;
+ /* Keep submitter_task store before clearing IORING_SETUP_R_DISABLED */
+ smp_store_release(&ctx->flags, ctx->flags & ~IORING_SETUP_R_DISABLED);
if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait))
wake_up(&ctx->sq_data->wait);
return 0;
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 160b4de2d00d..44442bf4827e 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1185,12 +1185,16 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
return -EBUSY;

nbufs = src_ctx->buf_table.nr;
+ if (!nbufs)
+ return -ENXIO;
if (!arg->nr)
arg->nr = nbufs;
else if (arg->nr > nbufs)
return -EINVAL;
else if (arg->nr > IORING_MAX_REG_BUFFERS)
return -EINVAL;
+ if (check_add_overflow(arg->nr, arg->src_off, &off) || off > nbufs)
+ return -EOVERFLOW;
if (check_add_overflow(arg->nr, arg->dst_off, &nbufs))
return -EOVERFLOW;
if (nbufs > IORING_MAX_REG_BUFFERS)
@@ -1210,21 +1214,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
}
}

- ret = -ENXIO;
- nbufs = src_ctx->buf_table.nr;
- if (!nbufs)
- goto out_free;
- ret = -EINVAL;
- if (!arg->nr)
- arg->nr = nbufs;
- else if (arg->nr > nbufs)
- goto out_free;
- ret = -EOVERFLOW;
- if (check_add_overflow(arg->nr, arg->src_off, &off))
- goto out_free;
- if (off > nbufs)
- goto out_free;
-
off = arg->dst_off;
i = arg->src_off;
nr = arg->nr;
@@ -1237,8 +1226,8 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
} else {
dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!dst_node) {
- ret = -ENOMEM;
- goto out_free;
+ io_rsrc_data_free(ctx, &data);
+ return -ENOMEM;
}

refcount_inc(&src_node->buf->refs);
@@ -1274,10 +1263,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
WARN_ON_ONCE(ctx->buf_table.nr);
ctx->buf_table = data;
return 0;
-
-out_free:
- io_rsrc_data_free(ctx, &data);
- return ret;
}

/*
diff --git a/io_uring/sync.c b/io_uring/sync.c
index cea2d381ffd2..ab7fa1cd7dd6 100644
--- a/io_uring/sync.c
+++ b/io_uring/sync.c
@@ -62,6 +62,8 @@ int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EINVAL;

sync->off = READ_ONCE(sqe->off);
+ if (sync->off < 0)
+ return -EINVAL;
sync->len = READ_ONCE(sqe->len);
req->flags |= REQ_F_FORCE_ASYNC;
return 0;
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 15b17e86e198..9b087ebeb643 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -214,7 +214,7 @@ static int ipc_permissions(struct ctl_table_header *head, const struct ctl_table
if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) ||
(table->data == &ns->ids[IPC_MSG_IDS].next_id) ||
(table->data == &ns->ids[IPC_SHM_IDS].next_id)) &&
- checkpoint_restore_ns_capable(ns->user_ns))
+ checkpoint_restore_ns_capable_noaudit(ns->user_ns))
mode = 0666;
else
#endif
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index d595fe512498..c2278f392e93 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -713,8 +713,8 @@ static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
}

-int __bpf_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char *sym)
+int bpf_address_lookup(unsigned long addr, unsigned long *size,
+ unsigned long *off, char *sym)
{
struct bpf_ksym *ksym;
int ret = 0;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 81ef159ef89b..68da6dcfb4bb 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1080,7 +1080,7 @@ const struct bpf_func_proto bpf_snprintf_proto = {
.func = bpf_snprintf,
.gpl_only = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg1_type = ARG_PTR_TO_MEM_OR_NULL | MEM_WRITE,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_PTR_TO_CONST_STR,
.arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c
index 3faf9cbd6c75..c0c93a0f5af6 100644
--- a/kernel/bpf/rqspinlock.c
+++ b/kernel/bpf/rqspinlock.c
@@ -276,10 +276,11 @@ int __lockfunc resilient_tas_spin_lock(rqspinlock_t *lock)

RES_INIT_TIMEOUT(ts);
/*
- * The fast path is not invoked for the TAS fallback, so we must grab
- * the deadlock detection entry here.
+ * We are either called directly from res_spin_lock after grabbing the
+ * deadlock detection entry when queued spinlocks are disabled, or from
+ * resilient_queued_spin_lock_slowpath after grabbing the deadlock
+ * detection entry. No need to obtain it here.
*/
- grab_held_lock_entry(lock);

/*
* Since the waiting loop's time is dependent on the amount of
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index df219e725909..586ece78f783 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1365,11 +1365,6 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
return ret;
}

-static bool bpf_net_capable(void)
-{
- return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
-}
-
#define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size
/* called via syscall */
static int map_create(union bpf_attr *attr, bpfptr_t uattr)
@@ -2830,6 +2825,13 @@ static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr
void *sig;
int err = 0;

+ /*
+ * Don't attempt to use kmalloc_large or vmalloc for signatures.
+ * Practical signature for BPF program should be below this limit.
+ */
+ if (attr->signature_size > KMALLOC_MAX_CACHE_SIZE)
+ return -EINVAL;
+
if (system_keyring_id_check(attr->keyring_id) == 0)
key = bpf_lookup_system_key(attr->keyring_id);
else
@@ -4554,6 +4556,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
if (IS_ERR(prog))
return PTR_ERR(prog);
+ } else if (!bpf_mprog_detach_empty(ptype)) {
+ return -EPERM;
}
} else if (is_cgroup_prog_type(ptype, 0, false)) {
if (attr->attach_flags || attr->relative_fd)
@@ -5299,6 +5303,9 @@ static int bpf_map_get_info_by_fd(struct file *file,
if (info.hash_size != SHA256_DIGEST_SIZE)
return -EINVAL;

+ if (!READ_ONCE(map->frozen))
+ return -EPERM;
+
err = map->ops->map_get_hash(map, SHA256_DIGEST_SIZE, map->sha);
if (err != 0)
return err;
@@ -6396,7 +6403,7 @@ static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
.func = bpf_kallsyms_lookup_name,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_MEM,
+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 89560e455ce7..c4fa2268dbbc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -16715,6 +16715,7 @@ static void sync_linked_regs(struct bpf_verifier_state *vstate, struct bpf_reg_s
} else {
s32 saved_subreg_def = reg->subreg_def;
s32 saved_off = reg->off;
+ u32 saved_id = reg->id;

fake_reg.type = SCALAR_VALUE;
__mark_reg_known(&fake_reg, (s32)reg->off - (s32)known_reg->off);
@@ -16722,10 +16723,11 @@ static void sync_linked_regs(struct bpf_verifier_state *vstate, struct bpf_reg_s
/* reg = known_reg; reg += delta */
copy_register_state(reg, known_reg);
/*
- * Must preserve off, id and add_const flag,
+ * Must preserve off, id and subreg_def flag,
* otherwise another sync_linked_regs() will be incorrect.
*/
reg->off = saved_off;
+ reg->id = saved_id;
reg->subreg_def = saved_subreg_def;

scalar32_min_max_add(reg, &fake_reg);
@@ -20147,17 +20149,19 @@ static int do_check(struct bpf_verifier_env *env)
* may skip a nospec patched-in after the jump. This can
* currently never happen because nospec_result is only
* used for the write-ops
- * `*(size*)(dst_reg+off)=src_reg|imm32` which must
- * never skip the following insn. Still, add a warning
- * to document this in case nospec_result is used
- * elsewhere in the future.
+ * `*(size*)(dst_reg+off)=src_reg|imm32` and helper
+ * calls. These must never skip the following insn
+ * (i.e., bpf_insn_successors()'s opcode_info.can_jump
+ * is false). Still, add a warning to document this in
+ * case nospec_result is used elsewhere in the future.
*
* All non-branch instructions have a single
* fall-through edge. For these, nospec_result should
* already work.
*/
- if (verifier_bug_if(BPF_CLASS(insn->code) == BPF_JMP ||
- BPF_CLASS(insn->code) == BPF_JMP32, env,
+ if (verifier_bug_if((BPF_CLASS(insn->code) == BPF_JMP ||
+ BPF_CLASS(insn->code) == BPF_JMP32) &&
+ BPF_OP(insn->code) != BPF_CALL, env,
"speculation barrier after jump instruction may not have the desired effect"))
return -EFAULT;
process_bpf_exit:
@@ -20215,29 +20219,29 @@ static int find_btf_percpu_datasec(struct btf *btf)
}

/*
- * Add btf to the used_btfs array and return the index. (If the btf was
- * already added, then just return the index.) Upon successful insertion
- * increase btf refcnt, and, if present, also refcount the corresponding
- * kernel module.
+ * Add btf to the env->used_btfs array. If needed, refcount the
+ * corresponding kernel module. To simplify caller's logic
+ * in case of error or if btf was added before the function
+ * decreases the btf refcount.
*/
static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf)
{
struct btf_mod_pair *btf_mod;
+ int ret = 0;
int i;

/* check whether we recorded this BTF (and maybe module) already */
for (i = 0; i < env->used_btf_cnt; i++)
if (env->used_btfs[i].btf == btf)
- return i;
+ goto ret_put;

if (env->used_btf_cnt >= MAX_USED_BTFS) {
verbose(env, "The total number of btfs per program has reached the limit of %u\n",
MAX_USED_BTFS);
- return -E2BIG;
+ ret = -E2BIG;
+ goto ret_put;
}

- btf_get(btf);
-
btf_mod = &env->used_btfs[env->used_btf_cnt];
btf_mod->btf = btf;
btf_mod->module = NULL;
@@ -20246,12 +20250,18 @@ static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf)
if (btf_is_module(btf)) {
btf_mod->module = btf_try_get_module(btf);
if (!btf_mod->module) {
- btf_put(btf);
- return -ENXIO;
+ ret = -ENXIO;
+ goto ret_put;
}
}

- return env->used_btf_cnt++;
+ env->used_btf_cnt++;
+ return 0;
+
+ret_put:
+ /* Either error or this BTF was already added */
+ btf_put(btf);
+ return ret;
}

/* replace pseudo btf_id with kernel symbol address */
@@ -20348,9 +20358,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,

btf_fd = insn[1].imm;
if (btf_fd) {
- CLASS(fd, f)(btf_fd);
-
- btf = __btf_get_by_fd(f);
+ btf = btf_get_by_fd(btf_fd);
if (IS_ERR(btf)) {
verbose(env, "invalid module BTF object FD specified.\n");
return -EINVAL;
@@ -20360,17 +20368,17 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
return -EINVAL;
}
+ btf_get(btf_vmlinux);
btf = btf_vmlinux;
}

err = __check_pseudo_btf_id(env, insn, aux, btf);
- if (err)
+ if (err) {
+ btf_put(btf);
return err;
+ }

- err = __add_used_btf(env, btf);
- if (err < 0)
- return err;
- return 0;
+ return __add_used_btf(env, btf);
}

static bool is_tracing_prog_type(enum bpf_prog_type type)
@@ -24092,10 +24100,8 @@ static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)

btf = __btf_get_by_fd(f);
if (!IS_ERR(btf)) {
- err = __add_used_btf(env, btf);
- if (err < 0)
- return err;
- return 0;
+ btf_get(btf);
+ return __add_used_btf(env, btf);
}

verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 049e296f586c..cdd6e025935d 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -345,7 +345,7 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
return 1;
}
return !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf) ||
- !!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
+ !!bpf_address_lookup(addr, symbolsize, offset, namebuf);
}

static int kallsyms_lookup_buildid(unsigned long addr,
@@ -377,12 +377,11 @@ static int kallsyms_lookup_buildid(unsigned long addr,
ret = module_address_lookup(addr, symbolsize, offset,
modname, modbuildid, namebuf);
if (!ret)
- ret = bpf_address_lookup(addr, symbolsize,
- offset, modname, namebuf);
+ ret = bpf_address_lookup(addr, symbolsize, offset, namebuf);

if (!ret)
- ret = ftrace_mod_address_lookup(addr, symbolsize,
- offset, modname, namebuf);
+ ret = ftrace_mod_address_lookup(addr, symbolsize, offset,
+ modname, modbuildid, namebuf);

return ret;
}
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index 00a60796327c..0fc11e45df9b 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -334,13 +334,8 @@ int module_address_lookup(unsigned long addr,
if (mod) {
if (modname)
*modname = mod->name;
- if (modbuildid) {
-#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
- *modbuildid = mod->build_id;
-#else
- *modbuildid = NULL;
-#endif
- }
+ if (modbuildid)
+ *modbuildid = module_buildid(mod);

sym = find_kallsyms_symbol(mod, addr, size, offset);

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b8bbe7960cda..2265b9c2906e 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -203,7 +203,7 @@ struct rcu_data {
/* during and after the last grace */
/* period it is aware of. */
struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */
- int defer_qs_iw_pending; /* Scheduler attention pending? */
+ int defer_qs_pending; /* irqwork or softirq pending? */
struct work_struct strict_work; /* Schedule readers for strict GPs. */

/* 2) batch handling */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index d85763336b3c..cafb1cc8eff8 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -487,8 +487,8 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
union rcu_special special;

rdp = this_cpu_ptr(&rcu_data);
- if (rdp->defer_qs_iw_pending == DEFER_QS_PENDING)
- rdp->defer_qs_iw_pending = DEFER_QS_IDLE;
+ if (rdp->defer_qs_pending == DEFER_QS_PENDING)
+ rdp->defer_qs_pending = DEFER_QS_IDLE;

/*
* If RCU core is waiting for this CPU to exit its critical section,
@@ -645,7 +645,7 @@ static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
* 5. Deferred QS reporting does not happen.
*/
if (rcu_preempt_depth() > 0)
- WRITE_ONCE(rdp->defer_qs_iw_pending, DEFER_QS_IDLE);
+ WRITE_ONCE(rdp->defer_qs_pending, DEFER_QS_IDLE);
}

/*
@@ -747,7 +747,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
// Using softirq, safe to awaken, and either the
// wakeup is free or there is either an expedited
// GP in flight or a potential need to deboost.
- raise_softirq_irqoff(RCU_SOFTIRQ);
+ if (rdp->defer_qs_pending != DEFER_QS_PENDING) {
+ rdp->defer_qs_pending = DEFER_QS_PENDING;
+ raise_softirq_irqoff(RCU_SOFTIRQ);
+ }
} else {
// Enabling BH or preempt does reschedule, so...
// Also if no expediting and no possible deboosting,
@@ -756,11 +759,11 @@ static void rcu_read_unlock_special(struct task_struct *t)
set_tsk_need_resched(current);
set_preempt_need_resched();
if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
- needs_exp && rdp->defer_qs_iw_pending != DEFER_QS_PENDING &&
+ needs_exp && rdp->defer_qs_pending != DEFER_QS_PENDING &&
cpu_online(rdp->cpu)) {
// Get scheduler to re-evaluate and call hooks.
// If !IRQ_WORK, FQS scan will eventually IPI.
- rdp->defer_qs_iw_pending = DEFER_QS_PENDING;
+ rdp->defer_qs_pending = DEFER_QS_PENDING;
irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
}
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e460c22de8ad..582c3847f483 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -119,6 +119,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_entry_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_exit_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_set_need_resched_tp);

DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
DEFINE_PER_CPU(struct rnd_state, sched_rnd_state);
@@ -1136,6 +1139,7 @@ void __trace_set_need_resched(struct task_struct *curr, int tif)
{
trace_sched_set_need_resched_tp(curr, smp_processor_id(), tif);
}
+EXPORT_SYMBOL_GPL(__trace_set_need_resched);

void resched_curr(struct rq *rq)
{
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index c7a8717e837d..72499cf2a1db 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -3591,6 +3591,9 @@ static void __dl_clear_params(struct sched_dl_entity *dl_se)
dl_se->dl_non_contending = 0;
dl_se->dl_overrun = 0;
dl_se->dl_server = 0;
+ dl_se->dl_defer = 0;
+ dl_se->dl_defer_running = 0;
+ dl_se->dl_defer_armed = 0;

#ifdef CONFIG_RT_MUTEXES
dl_se->pi_se = dl_se;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index fb07dcfc60a2..d4d994fb8999 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2100,6 +2100,7 @@ static void push_rt_tasks(struct rq *rq)
*/
static int rto_next_cpu(struct root_domain *rd)
{
+ int this_cpu = smp_processor_id();
int next;
int cpu;

@@ -2123,6 +2124,10 @@ static int rto_next_cpu(struct root_domain *rd)

rd->rto_cpu = cpu;

+ /* Do not send IPI to self */
+ if (cpu == this_cpu)
+ continue;
+
if (cpu < nr_cpu_ids)
return cpu;

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index e618addb5864..21b6d9340148 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1742,7 +1742,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,

lockdep_assert_held(&cpu_base->lock);

- debug_deactivate(timer);
+ debug_hrtimer_deactivate(timer);
base->running = timer;

/*
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index cc1afec306b3..425d429906d0 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -215,7 +215,7 @@ void sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)

update_clock_read_data(&rd);

- if (sched_clock_timer.function != NULL) {
+ if (ACCESS_PRIVATE(&sched_clock_timer, function) != NULL) {
/* update timeout for clock wrap */
hrtimer_start(&sched_clock_timer, cd.wrap_kt,
HRTIMER_MODE_REL_HARD);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 49e0bdaa7a1b..e7f1fe44352a 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1022,7 +1022,7 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
.func = bpf_snprintf_btf,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_MEM,
+ .arg1_type = ARG_PTR_TO_MEM | MEM_WRITE,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_CONST_SIZE,
@@ -1526,7 +1526,7 @@ static const struct bpf_func_proto bpf_read_branch_records_proto = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg2_type = ARG_PTR_TO_MEM_OR_NULL | MEM_WRITE,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
@@ -1661,7 +1661,7 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e95408a47c1d..905f4d167955 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7709,7 +7709,8 @@ ftrace_func_address_lookup(struct ftrace_mod_map *mod_map,

int
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
- unsigned long *off, char **modname, char *sym)
+ unsigned long *off, char **modname,
+ const unsigned char **modbuildid, char *sym)
{
struct ftrace_mod_map *mod_map;
int ret = 0;
@@ -7721,6 +7722,8 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
if (ret) {
if (modname)
*modname = mod_map->mod->name;
+ if (modbuildid)
+ *modbuildid = module_buildid(mod_map->mod);
break;
}
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 099f08132902..5cf55a9c6fad 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3964,11 +3964,6 @@ void trace_put_event_file(struct trace_event_file *file)
EXPORT_SYMBOL_GPL(trace_put_event_file);

#ifdef CONFIG_DYNAMIC_FTRACE
-
-/* Avoid typos */
-#define ENABLE_EVENT_STR "enable_event"
-#define DISABLE_EVENT_STR "disable_event"
-
struct event_probe_data {
struct trace_event_file *file;
unsigned long count;
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 425ae26064ba..45727c4cf954 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -6909,7 +6909,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,

remove_hist_vars(hist_data);

- kfree(trigger_data);
+ trigger_data_free(trigger_data);

destroy_hist_data(hist_data);
goto out;
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 586af49fc03e..fc4a8f2d3096 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -47,7 +47,7 @@ static int set_permissions(struct ctl_table_header *head,
int mode;

/* Allow users with CAP_SYS_RESOURCE unrestrained access */
- if (ns_capable(user_ns, CAP_SYS_RESOURCE))
+ if (ns_capable_noaudit(user_ns, CAP_SYS_RESOURCE))
mode = (table->mode & S_IRWXU) >> 6;
else
/* Allow all others at most read-only access */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 45320e27a16c..885a8b31f855 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -117,6 +117,8 @@ enum wq_internal_consts {
MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */
CREATE_COOLDOWN = HZ, /* time to breath after fail */

+ RESCUER_BATCH = 16, /* process items per turn */
+
/*
* Rescue workers are used only on emergencies and shared by
* all cpus. Give MIN_NICE.
@@ -286,6 +288,7 @@ struct pool_workqueue {
struct list_head pending_node; /* LN: node on wq_node_nr_active->pending_pwqs */
struct list_head pwqs_node; /* WR: node on wq->pwqs */
struct list_head mayday_node; /* MD: node on wq->maydays */
+ struct work_struct mayday_cursor; /* L: cursor on pool->worklist */

u64 stats[PWQ_NR_STATS];

@@ -1126,6 +1129,12 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool,
return NULL;
}

+static void mayday_cursor_func(struct work_struct *work)
+{
+ /* should not be processed, only for marking position */
+ BUG();
+}
+
/**
* move_linked_works - move linked works to a list
* @work: start of series of works to be scheduled
@@ -1188,6 +1197,16 @@ static bool assign_work(struct work_struct *work, struct worker *worker,

lockdep_assert_held(&pool->lock);

+ /* The cursor work should not be processed */
+ if (unlikely(work->func == mayday_cursor_func)) {
+ /* only worker_thread() can possibly take this branch */
+ WARN_ON_ONCE(worker->rescue_wq);
+ if (nextp)
+ *nextp = list_next_entry(work, entry);
+ list_del_init(&work->entry);
+ return false;
+ }
+
/*
* A single work shouldn't be executed concurrently by multiple workers.
* __queue_work() ensures that @work doesn't jump to a different pool
@@ -3443,6 +3462,35 @@ static int worker_thread(void *__worker)
goto woke_up;
}

+static bool assign_rescuer_work(struct pool_workqueue *pwq, struct worker *rescuer)
+{
+ struct worker_pool *pool = pwq->pool;
+ struct work_struct *cursor = &pwq->mayday_cursor;
+ struct work_struct *work, *n;
+
+ /* need rescue? */
+ if (!pwq->nr_active || !need_to_create_worker(pool))
+ return false;
+
+ /* search from the start or cursor if available */
+ if (list_empty(&cursor->entry))
+ work = list_first_entry(&pool->worklist, struct work_struct, entry);
+ else
+ work = list_next_entry(cursor, entry);
+
+ /* find the next work item to rescue */
+ list_for_each_entry_safe_from(work, n, &pool->worklist, entry) {
+ if (get_work_pwq(work) == pwq && assign_work(work, rescuer, &n)) {
+ pwq->stats[PWQ_STAT_RESCUED]++;
+ /* put the cursor for next search */
+ list_move_tail(&cursor->entry, &n->entry);
+ return true;
+ }
+ }
+
+ return false;
+}
+
/**
* rescuer_thread - the rescuer thread function
* @__rescuer: self
@@ -3497,7 +3545,7 @@ static int rescuer_thread(void *__rescuer)
struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
struct pool_workqueue, mayday_node);
struct worker_pool *pool = pwq->pool;
- struct work_struct *work, *n;
+ unsigned int count = 0;

__set_current_state(TASK_RUNNING);
list_del_init(&pwq->mayday_node);
@@ -3508,30 +3556,18 @@ static int rescuer_thread(void *__rescuer)

raw_spin_lock_irq(&pool->lock);

- /*
- * Slurp in all works issued via this workqueue and
- * process'em.
- */
WARN_ON_ONCE(!list_empty(&rescuer->scheduled));
- list_for_each_entry_safe(work, n, &pool->worklist, entry) {
- if (get_work_pwq(work) == pwq &&
- assign_work(work, rescuer, &n))
- pwq->stats[PWQ_STAT_RESCUED]++;
- }

- if (!list_empty(&rescuer->scheduled)) {
+ while (assign_rescuer_work(pwq, rescuer)) {
process_scheduled_works(rescuer);

/*
- * The above execution of rescued work items could
- * have created more to rescue through
- * pwq_activate_first_inactive() or chained
- * queueing. Let's put @pwq back on mayday list so
- * that such back-to-back work items, which may be
- * being used to relieve memory pressure, don't
- * incur MAYDAY_INTERVAL delay inbetween.
+ * If the per-turn work item limit is reached and other
+ * PWQs are in mayday, requeue mayday for this PWQ and
+ * let the rescuer handle the other PWQs first.
*/
- if (pwq->nr_active && need_to_create_worker(pool)) {
+ if (++count > RESCUER_BATCH && !list_empty(&pwq->wq->maydays) &&
+ pwq->nr_active && need_to_create_worker(pool)) {
raw_spin_lock(&wq_mayday_lock);
/*
* Queue iff we aren't racing destruction
@@ -3542,9 +3578,14 @@ static int rescuer_thread(void *__rescuer)
list_add_tail(&pwq->mayday_node, &wq->maydays);
}
raw_spin_unlock(&wq_mayday_lock);
+ break;
}
}

+ /* The cursor can not be left behind without the rescuer watching it. */
+ if (!list_empty(&pwq->mayday_cursor.entry) && list_empty(&pwq->mayday_node))
+ list_del_init(&pwq->mayday_cursor.entry);
+
/*
* Leave this pool. Notify regular workers; otherwise, we end up
* with 0 concurrency and stalling the execution.
@@ -5163,6 +5204,19 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
INIT_LIST_HEAD(&pwq->pwqs_node);
INIT_LIST_HEAD(&pwq->mayday_node);
kthread_init_work(&pwq->release_work, pwq_release_workfn);
+
+ /*
+ * Set the dummy cursor work with valid function and get_work_pwq().
+ *
+ * The cursor work should only be in the pwq->pool->worklist, and
+ * should not be treated as a processable work item.
+ *
+ * WORK_STRUCT_PENDING and WORK_STRUCT_INACTIVE just make it less
+ * surprise for kernel debugging tools and reviewers.
+ */
+ INIT_WORK(&pwq->mayday_cursor, mayday_cursor_func);
+ atomic_long_set(&pwq->mayday_cursor.data, (unsigned long)pwq |
+ WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | WORK_STRUCT_INACTIVE);
}

/* sync @pwq with the current state of its associated wq and link it */
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index bdde40cd69d7..97be2a39f537 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -340,8 +340,8 @@ EXPORT_SYMBOL(kstrtos8);
* @s: input string
* @res: result
*
- * This routine returns 0 iff the first character is one of 'YyTt1NnFf0', or
- * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value
+ * This routine returns 0 iff the first character is one of 'EeYyTt1DdNnFf0',
+ * or [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value
* pointed to by res is updated upon finding a match.
*/
noinline
diff --git a/lib/objpool.c b/lib/objpool.c
index b998b720c732..d98fadf1de16 100644
--- a/lib/objpool.c
+++ b/lib/objpool.c
@@ -142,7 +142,7 @@ int objpool_init(struct objpool_head *pool, int nr_objs, int object_size,
pool->gfp = gfp & ~__GFP_ZERO;
pool->context = context;
pool->release = release;
- slot_size = nr_cpu_ids * sizeof(struct objpool_slot);
+ slot_size = nr_cpu_ids * sizeof(struct objpool_slot *);
pool->cpu_slots = kzalloc(slot_size, pool->gfp);
if (!pool->cpu_slots)
return -ENOMEM;
diff --git a/mm/slub.c b/mm/slub.c
index e01641cea143..896421a55557 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -6232,11 +6232,29 @@ static void rcu_free_sheaf(struct rcu_head *head)
free_empty_sheaf(s, sheaf);
}

+/*
+ * kvfree_call_rcu() can be called while holding a raw_spinlock_t. Since
+ * __kfree_rcu_sheaf() may acquire a spinlock_t (sleeping lock on PREEMPT_RT),
+ * this would violate lock nesting rules. Therefore, kvfree_call_rcu() avoids
+ * this problem by bypassing the sheaves layer entirely on PREEMPT_RT.
+ *
+ * However, lockdep still complains that it is invalid to acquire spinlock_t
+ * while holding raw_spinlock_t, even on !PREEMPT_RT where spinlock_t is a
+ * spinning lock. Tell lockdep that acquiring spinlock_t is valid here
+ * by temporarily raising the wait-type to LD_WAIT_CONFIG.
+ */
+static DEFINE_WAIT_OVERRIDE_MAP(kfree_rcu_sheaf_map, LD_WAIT_CONFIG);
+
bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
{
struct slub_percpu_sheaves *pcs;
struct slab_sheaf *rcu_sheaf;

+ if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
+ return false;
+
+ lock_map_acquire_try(&kfree_rcu_sheaf_map);
+
if (!local_trylock(&s->cpu_sheaves->lock))
goto fail;

@@ -6313,10 +6331,12 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
local_unlock(&s->cpu_sheaves->lock);

stat(s, FREE_RCU_SHEAF);
+ lock_map_release(&kfree_rcu_sheaf_map);
return true;

fail:
stat(s, FREE_RCU_SHEAF_FAIL);
+ lock_map_release(&kfree_rcu_sheaf_map);
return false;
}

diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index e70ae2c113f9..358fbe5e4d1d 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -22,6 +22,36 @@

struct atm_vcc *sigd = NULL;

+/*
+ * find_get_vcc - validate and get a reference to a vcc pointer
+ * @vcc: the vcc pointer to validate
+ *
+ * This function validates that @vcc points to a registered VCC in vcc_hash.
+ * If found, it increments the socket reference count and returns the vcc.
+ * The caller must call sock_put(sk_atm(vcc)) when done.
+ *
+ * Returns the vcc pointer if valid, NULL otherwise.
+ */
+static struct atm_vcc *find_get_vcc(struct atm_vcc *vcc)
+{
+ int i;
+
+ read_lock(&vcc_sklist_lock);
+ for (i = 0; i < VCC_HTABLE_SIZE; i++) {
+ struct sock *s;
+
+ sk_for_each(s, &vcc_hash[i]) {
+ if (atm_sk(s) == vcc) {
+ sock_hold(s);
+ read_unlock(&vcc_sklist_lock);
+ return vcc;
+ }
+ }
+ }
+ read_unlock(&vcc_sklist_lock);
+ return NULL;
+}
+
static void sigd_put_skb(struct sk_buff *skb)
{
if (!sigd) {
@@ -69,7 +99,14 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)

msg = (struct atmsvc_msg *) skb->data;
WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
- vcc = *(struct atm_vcc **) &msg->vcc;
+
+ vcc = find_get_vcc(*(struct atm_vcc **)&msg->vcc);
+ if (!vcc) {
+ pr_debug("invalid vcc pointer in msg\n");
+ dev_kfree_skb(skb);
+ return -EINVAL;
+ }
+
pr_debug("%d (0x%lx)\n", (int)msg->type, (unsigned long)vcc);
sk = sk_atm(vcc);

@@ -100,7 +137,16 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
clear_bit(ATM_VF_WAITING, &vcc->flags);
break;
case as_indicate:
- vcc = *(struct atm_vcc **)&msg->listen_vcc;
+ /* Release the reference from msg->vcc, we'll use msg->listen_vcc instead */
+ sock_put(sk);
+
+ vcc = find_get_vcc(*(struct atm_vcc **)&msg->listen_vcc);
+ if (!vcc) {
+ pr_debug("invalid listen_vcc pointer in msg\n");
+ dev_kfree_skb(skb);
+ return -EINVAL;
+ }
+
sk = sk_atm(vcc);
pr_debug("as_indicate!!!\n");
lock_sock(sk);
@@ -115,6 +161,8 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
sk->sk_state_change(sk);
as_indicate_complete:
release_sock(sk);
+ /* Paired with find_get_vcc(msg->listen_vcc) above */
+ sock_put(sk);
return 0;
case as_close:
set_bit(ATM_VF_RELEASED, &vcc->flags);
@@ -131,11 +179,15 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
break;
default:
pr_alert("bad message type %d\n", (int)msg->type);
+ /* Paired with find_get_vcc(msg->vcc) above */
+ sock_put(sk);
return -EINVAL;
}
sk->sk_state_change(sk);
out:
dev_kfree_skb(skb);
+ /* Paired with find_get_vcc(msg->vcc) above */
+ sock_put(sk);
return 0;
}

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 22d12e545966..5855eb050208 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -244,14 +244,11 @@ br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid)

lockdep_assert_held_once(&port->br->multicast_lock);

- if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
- return NULL;
-
/* Take RCU to access the vlan. */
rcu_read_lock();

vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid);
- if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx))
+ if (vlan)
pmctx = &vlan->port_mcast_ctx;

rcu_read_unlock();
@@ -701,7 +698,10 @@ br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx,
u32 max = READ_ONCE(pmctx->mdb_max_entries);
u32 n = READ_ONCE(pmctx->mdb_n_entries);

- if (max && n >= max) {
+ /* enforce the max limit when it's a port pmctx or a port-vlan pmctx
+ * with snooping enabled
+ */
+ if (!br_multicast_port_ctx_vlan_disabled(pmctx) && max && n >= max) {
NL_SET_ERR_MSG_FMT_MOD(extack, "%s is already in %u groups, and mcast_max_groups=%u",
what, n, max);
return -E2BIG;
@@ -736,9 +736,7 @@ static int br_multicast_port_ngroups_inc(struct net_bridge_port *port,
return err;
}

- /* Only count on the VLAN context if VID is given, and if snooping on
- * that VLAN is enabled.
- */
+ /* Only count on the VLAN context if VID is given */
if (!group->vid)
return 0;

@@ -2011,6 +2009,18 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port,
timer_setup(&pmctx->ip6_own_query.timer,
br_ip6_multicast_port_query_expired, 0);
#endif
+ /* initialize mdb_n_entries if a new port vlan is being created */
+ if (vlan) {
+ struct net_bridge_port_group *pg;
+ u32 n = 0;
+
+ spin_lock_bh(&port->br->multicast_lock);
+ hlist_for_each_entry(pg, &port->mglist, mglist)
+ if (pg->key.addr.vid == vlan->vid)
+ n++;
+ WRITE_ONCE(pmctx->mdb_n_entries, n);
+ spin_unlock_bh(&port->br->multicast_lock);
+ }
}

void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx)
@@ -2094,25 +2104,6 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
br_ip4_multicast_add_router(brmctx, pmctx);
br_ip6_multicast_add_router(brmctx, pmctx);
}
-
- if (br_multicast_port_ctx_is_vlan(pmctx)) {
- struct net_bridge_port_group *pg;
- u32 n = 0;
-
- /* The mcast_n_groups counter might be wrong. First,
- * BR_VLFLAG_MCAST_ENABLED is toggled before temporary entries
- * are flushed, thus mcast_n_groups after the toggle does not
- * reflect the true values. And second, permanent entries added
- * while BR_VLFLAG_MCAST_ENABLED was disabled, are not reflected
- * either. Thus we have to refresh the counter.
- */
-
- hlist_for_each_entry(pg, &pmctx->port->mglist, mglist) {
- if (pg->key.addr.vid == pmctx->vlan->vid)
- n++;
- }
- WRITE_ONCE(pmctx->mdb_n_entries, n);
- }
}

static void br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
diff --git a/net/core/dev.c b/net/core/dev.c
index 5b536860138d..ff70c902a419 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -738,7 +738,7 @@ static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
{
int k = stack->num_paths++;

- if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
+ if (k >= NET_DEVICE_PATH_STACK_MAX)
return NULL;

return &stack->path[k];
diff --git a/net/core/filter.c b/net/core/filter.c
index 88b265f6ccf8..d93f7dea828e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4133,7 +4133,7 @@ static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
- .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_CONST_SIZE,
};

@@ -6325,7 +6325,7 @@ static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
@@ -6380,7 +6380,7 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
@@ -7934,9 +7934,9 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
.gpl_only = true, /* __cookie_v4_init_sequence() is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_RDONLY,
.arg1_size = sizeof(struct iphdr),
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
};

@@ -7966,9 +7966,9 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
.gpl_only = true, /* __cookie_v6_init_sequence() is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_RDONLY,
.arg1_size = sizeof(struct ipv6hdr),
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
};

@@ -7986,9 +7986,9 @@ static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = {
.gpl_only = true, /* __cookie_v4_check is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_RDONLY,
.arg1_size = sizeof(struct iphdr),
- .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_RDONLY,
.arg2_size = sizeof(struct tcphdr),
};

@@ -8010,9 +8010,9 @@ static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = {
.gpl_only = true, /* __cookie_v6_check is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_RDONLY,
.arg1_size = sizeof(struct ipv6hdr),
- .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_RDONLY,
.arg2_size = sizeof(struct tcphdr),
};
#endif /* CONFIG_SYN_COOKIES */
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2ac7731e1e0a..ddde93dd8bc6 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -409,22 +409,26 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
}
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);

-/* Receive sk_msg from psock->ingress_msg to @msg. */
-int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
- int len, int flags)
+int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ int len, int flags, int *copied_from_self)
{
struct iov_iter *iter = &msg->msg_iter;
int peek = flags & MSG_PEEK;
struct sk_msg *msg_rx;
int i, copied = 0;
+ bool from_self;

msg_rx = sk_psock_peek_msg(psock);
+ if (copied_from_self)
+ *copied_from_self = 0;
+
while (copied != len) {
struct scatterlist *sge;

if (unlikely(!msg_rx))
break;

+ from_self = msg_rx->sk == sk;
i = msg_rx->sg.start;
do {
struct page *page;
@@ -443,6 +447,9 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
}

copied += copy;
+ if (from_self && copied_from_self)
+ *copied_from_self += copy;
+
if (likely(!peek)) {
sge->offset += copy;
sge->length -= copy;
@@ -451,6 +458,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
atomic_sub(copy, &sk->sk_rmem_alloc);
}
msg_rx->sg.size -= copy;
+ sk_psock_msg_len_add(psock, -copy);

if (!sge->length) {
sk_msg_iter_var_next(i);
@@ -487,6 +495,13 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
out:
return copied;
}
+
+/* Receive sk_msg from psock->ingress_msg to @msg. */
+int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ int len, int flags)
+{
+ return __sk_msg_recvmsg(sk, psock, msg, len, flags, NULL);
+}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);

bool sk_msg_is_readable(struct sock *sk)
@@ -616,6 +631,12 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
if (unlikely(!msg))
return -EAGAIN;
skb_set_owner_r(skb, sk);
+
+ /* This is used in tcp_bpf_recvmsg_parser() to determine whether the
+ * data originates from the socket's own protocol stack. No need to
+ * refcount sk because msg's lifetime is bound to sk via the ingress_msg.
+ */
+ msg->sk = sk;
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref);
if (err < 0)
kfree(msg);
@@ -801,9 +822,11 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
list_del(&msg->list);
if (!msg->skb)
atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc);
+ sk_psock_msg_len_add(psock, -msg->sg.size);
sk_msg_free(psock->sk, msg);
kfree(msg);
}
+ WARN_ON_ONCE(psock->msg_tot_len);
}

static void __sk_psock_zap_ingress(struct sk_psock *psock)
@@ -909,6 +932,7 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
sk_msg_compute_data_pointers(msg);
msg->sk = sk;
ret = bpf_prog_run_pin_on_cpu(prog, msg);
+ msg->sk = NULL;
ret = sk_psock_map_verd(ret, msg->sk_redir);
psock->apply_bytes = msg->apply_bytes;
if (ret == __SK_REDIRECT) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 1b7fb5d935ed..3e19a5d465b8 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -248,7 +248,8 @@ bool icmp_global_allow(struct net *net)
if (delta < HZ / 50)
return false;

- incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ;
+ incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec);
+ incr = div_u64((u64)incr * delta, HZ);
if (!incr)
return false;

@@ -554,6 +555,21 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
/* steal dst entry from skb_in, don't drop refcnt */
skb_dstref_steal(skb_in);
skb_dstref_restore(skb_in, orefdst);
+
+ /*
+ * At this point, fl4_dec.daddr should NOT be local (we
+ * checked fl4_dec.saddr above). However, a race condition
+ * may occur if the address is added to the interface
+ * concurrently. In that case, ip_route_input() returns a
+ * LOCAL route with dst.output=ip_rt_bug, which must not
+ * be used for output.
+ */
+ if (!err && rt2 && rt2->rt_type == RTN_LOCAL) {
+ net_warn_ratelimited("detected local route for %pI4 during ICMP sending, src %pI4\n",
+ &fl4_dec.daddr, &fl4_dec.saddr);
+ dst_release(&rt2->dst);
+ err = -EINVAL;
+ }
}

if (err)
@@ -843,16 +859,22 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
/* Checkin full IP header plus 8 bytes of protocol to
* avoid additional coding at protocol handlers.
*/
- if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) {
- __ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS);
- return;
- }
+ if (!pskb_may_pull(skb, iph->ihl * 4 + 8))
+ goto out;
+
+ /* IPPROTO_RAW sockets are not supposed to receive anything. */
+ if (protocol == IPPROTO_RAW)
+ goto out;

raw_icmp_error(skb, protocol, info);

ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && ipprot->err_handler)
ipprot->err_handler(skb, info);
+ return;
+
+out:
+ __ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS);
}

static bool icmp_tag_validation(int proto)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index a5227d23bb0b..690f486173e0 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -148,7 +148,7 @@ void ping_unhash(struct sock *sk)
pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
spin_lock(&ping_table.lock);
if (sk_del_node_init_rcu(sk)) {
- isk->inet_num = 0;
+ WRITE_ONCE(isk->inet_num, 0);
isk->inet_sport = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
}
@@ -181,31 +181,35 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
}

sk_for_each_rcu(sk, hslot) {
+ int bound_dev_if;
+
if (!net_eq(sock_net(sk), net))
continue;
isk = inet_sk(sk);

pr_debug("iterate\n");
- if (isk->inet_num != ident)
+ if (READ_ONCE(isk->inet_num) != ident)
continue;

+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
if (skb->protocol == htons(ETH_P_IP) &&
sk->sk_family == AF_INET) {
+ __be32 rcv_saddr = READ_ONCE(isk->inet_rcv_saddr);
+
pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk,
- (int) isk->inet_num, &isk->inet_rcv_saddr,
- sk->sk_bound_dev_if);
+ ident, &rcv_saddr,
+ bound_dev_if);

- if (isk->inet_rcv_saddr &&
- isk->inet_rcv_saddr != ip_hdr(skb)->daddr)
+ if (rcv_saddr && rcv_saddr != ip_hdr(skb)->daddr)
continue;
#if IS_ENABLED(CONFIG_IPV6)
} else if (skb->protocol == htons(ETH_P_IPV6) &&
sk->sk_family == AF_INET6) {

pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk,
- (int) isk->inet_num,
+ ident,
&sk->sk_v6_rcv_saddr,
- sk->sk_bound_dev_if);
+ bound_dev_if);

if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
!ipv6_addr_equal(&sk->sk_v6_rcv_saddr,
@@ -216,8 +220,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
continue;
}

- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
- sk->sk_bound_dev_if != sdif)
+ if (bound_dev_if && bound_dev_if != dif &&
+ bound_dev_if != sdif)
continue;

goto exit;
@@ -392,7 +396,9 @@ static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr)
if (saddr->sa_family == AF_INET) {
struct inet_sock *isk = inet_sk(sk);
struct sockaddr_in *addr = (struct sockaddr_in *) saddr;
- isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
+
+ isk->inet_saddr = addr->sin_addr.s_addr;
+ WRITE_ONCE(isk->inet_rcv_saddr, addr->sin_addr.s_addr);
#if IS_ENABLED(CONFIG_IPV6)
} else if (saddr->sa_family == AF_INET6) {
struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr;
@@ -849,7 +855,8 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
struct sk_buff *skb;
int copied, err;

- pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num);
+ pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk,
+ READ_ONCE(isk->inet_num));

err = -EOPNOTSUPP;
if (flags & MSG_OOB)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 74079eab8980..e35825656e6e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -503,6 +503,9 @@ static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
struct sk_buff *skb = tcp_write_queue_tail(sk);
u32 tsflags = sockc->tsflags;

+ if (unlikely(!skb))
+ skb = skb_rb_last(&sk->tcp_rtx_queue);
+
if (tsflags && skb) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index a268e1595b22..ca8a5cb8e569 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -10,6 +10,7 @@

#include <net/inet_common.h>
#include <net/tls.h>
+#include <asm/ioctls.h>

void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
{
@@ -226,6 +227,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
int peek = flags & MSG_PEEK;
struct sk_psock *psock;
struct tcp_sock *tcp;
+ int copied_from_self = 0;
int copied = 0;
u32 seq;

@@ -262,7 +264,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
}

msg_bytes_ready:
- copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+ copied = __sk_msg_recvmsg(sk, psock, msg, len, flags, &copied_from_self);
/* The typical case for EFAULT is the socket was gracefully
* shutdown with a FIN pkt. So check here the other case is
* some error on copy_page_to_iter which would be unexpected.
@@ -277,7 +279,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
goto out;
}
}
- seq += copied;
+ seq += copied_from_self;
if (!copied) {
long timeo;
int data;
@@ -331,6 +333,24 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
return copied;
}

+static int tcp_bpf_ioctl(struct sock *sk, int cmd, int *karg)
+{
+ bool slow;
+
+ if (cmd != SIOCINQ)
+ return tcp_ioctl(sk, cmd, karg);
+
+ /* works similar as tcp_ioctl */
+ if (sk->sk_state == TCP_LISTEN)
+ return -EINVAL;
+
+ slow = lock_sock_fast(sk);
+ *karg = sk_psock_msg_inq(sk);
+ unlock_sock_fast(sk, slow);
+
+ return 0;
+}
+
static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int flags, int *addr_len)
{
@@ -609,6 +629,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
prot[TCP_BPF_BASE].close = sock_map_close;
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable;
+ prot[TCP_BPF_BASE].ioctl = tcp_bpf_ioctl;

prot[TCP_BPF_TX] = prot[TCP_BPF_BASE];
prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index df758adbb445..e9f6c77e0631 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -16,6 +16,7 @@
#include <linux/gfp.h>
#include <linux/jhash.h>
#include <net/tcp.h>
+#include <net/tcp_ecn.h>
#include <trace/events/tcp.h>

static DEFINE_SPINLOCK(tcp_cong_list_lock);
@@ -227,7 +228,7 @@ void tcp_assign_congestion_control(struct sock *sk)

memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
if (ca->flags & TCP_CONG_NEEDS_ECN)
- INET_ECN_xmit(sk);
+ INET_ECN_xmit_ect_1_negotiation(sk);
else
INET_ECN_dontxmit(sk);
}
@@ -257,7 +258,7 @@ static void tcp_reinit_congestion_control(struct sock *sk,
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));

if (ca->flags & TCP_CONG_NEEDS_ECN)
- INET_ECN_xmit(sk);
+ INET_ECN_xmit_ect_1_negotiation(sk);
else
INET_ECN_dontxmit(sk);

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e4a979b75cc6..ede266463d5d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6817,7 +6817,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tp->max_window = tp->snd_wnd;

- tcp_ecn_rcv_syn(tp, th, skb);
+ tcp_ecn_rcv_syn(sk, th, skb);

tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
@@ -7222,7 +7222,8 @@ static void tcp_ecn_create_request(struct request_sock *req,
u32 ecn_ok_dst;

if (tcp_accecn_syn_requested(th) &&
- READ_ONCE(net->ipv4.sysctl_tcp_ecn) >= 3) {
+ (READ_ONCE(net->ipv4.sysctl_tcp_ecn) >= 3 ||
+ tcp_ca_needs_accecn(listen_sk))) {
inet_rsk(req)->ecn_ok = 1;
tcp_rsk(req)->accecn_ok = 1;
tcp_rsk(req)->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield &
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 2ec8c6f1cdcc..1fade94813c6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -488,9 +488,10 @@ static void tcp_ecn_openreq_child(struct sock *sk,
tp->accecn_opt_demand = 1;
tcp_ecn_received_counters_payload(sk, skb);
} else {
- tcp_ecn_mode_set(tp, inet_rsk(req)->ecn_ok ?
- TCP_ECN_MODE_RFC3168 :
- TCP_ECN_DISABLED);
+ if (inet_rsk(req)->ecn_ok && !tcp_ca_no_fallback_rfc3168(sk))
+ tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
+ else
+ tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
}
}

diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 0735d820e413..91233e37cd97 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -5,6 +5,7 @@
#include <net/sock.h>
#include <net/udp.h>
#include <net/inet_common.h>
+#include <asm/ioctls.h>

#include "udp_impl.h"

@@ -111,12 +112,26 @@ enum {
static DEFINE_SPINLOCK(udpv6_prot_lock);
static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS];

+static int udp_bpf_ioctl(struct sock *sk, int cmd, int *karg)
+{
+ if (cmd != SIOCINQ)
+ return udp_ioctl(sk, cmd, karg);
+
+ /* Since we don't hold a lock, sk_receive_queue may contain data.
+ * BPF might only be processing this data at the moment. We only
+ * care about the data in the ingress_msg here.
+ */
+ *karg = sk_msg_first_len(sk);
+ return 0;
+}
+
static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
{
- *prot = *base;
- prot->close = sock_map_close;
- prot->recvmsg = udp_bpf_recvmsg;
- prot->sock_is_readable = sk_msg_is_readable;
+ *prot = *base;
+ prot->close = sock_map_close;
+ prot->recvmsg = udp_bpf_recvmsg;
+ prot->sock_is_readable = sk_msg_is_readable;
+ prot->ioctl = udp_bpf_ioctl;
}

static void udp_bpf_check_v6_needs_rebuild(struct proto *ops)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 1b0314644e0c..0e8f48835869 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -955,7 +955,7 @@ static int __net_init inet6_net_init(struct net *net)
int err = 0;

net->ipv6.sysctl.bindv6only = 0;
- net->ipv6.sysctl.icmpv6_time = 1*HZ;
+ net->ipv6.sysctl.icmpv6_time = HZ / 10;
net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index cf6455cbe2cc..35b32dcf581f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -217,14 +217,9 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
} else if (dev && (dev->flags & IFF_LOOPBACK)) {
res = true;
} else {
- struct rt6_info *rt = dst_rt6_info(dst);
- int tmo = net->ipv6.sysctl.icmpv6_time;
+ int tmo = READ_ONCE(net->ipv6.sysctl.icmpv6_time);
struct inet_peer *peer;

- /* Give more bandwidth to wider prefixes. */
- if (rt->rt6i_dst.plen < 128)
- tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
-
peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
res = inet_peer_xrlim_allow(peer, tmo);
}
@@ -870,6 +865,12 @@ enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
if (reason != SKB_NOT_DROPPED_YET)
goto out;

+ if (nexthdr == IPPROTO_RAW) {
+ /* Add a more specific reason later ? */
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ goto out;
+ }
+
/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
Without this we will not able f.e. to make source routed
pmtu discovery.
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index c6439e30e892..cc149227b49f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1139,7 +1139,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
fib6_add_gc_list(iter);
}
if (!(rt->fib6_flags & (RTF_ADDRCONF | RTF_PREFIX_RT)) &&
- !iter->fib6_nh->fib_nh_gw_family) {
+ (iter->nh || !iter->fib6_nh->fib_nh_gw_family)) {
iter->fib6_flags &= ~RTF_ADDRCONF;
iter->fib6_flags &= ~RTF_PREFIX_RT;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e4bb7e2d7b19..8f18509204b6 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -224,9 +224,6 @@ static bool mptcp_rcvbuf_grow(struct sock *sk, u32 newval)
do_div(grow, oldval);
rcvwin += grow << 1;

- if (!RB_EMPTY_ROOT(&msk->out_of_order_queue))
- rcvwin += MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq - msk->ack_seq;
-
cap = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);

rcvbuf = min_t(u32, mptcp_space_from_win(sk, rcvwin), cap);
@@ -350,9 +347,6 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)
end:
skb_condense(skb);
skb_set_owner_r(skb, sk);
- /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */
- if (sk->sk_socket)
- mptcp_rcvbuf_grow(sk, msk->rcvq_space.space);
}

static void mptcp_init_skb(struct sock *ssk, struct sk_buff *skb, int offset,
@@ -2062,8 +2056,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)

msk->rcvq_space.copied += copied;

- mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
- time = tcp_stamp_us_delta(mstamp, msk->rcvq_space.time);
+ mstamp = mptcp_stamp();
+ time = tcp_stamp_us_delta(mstamp, READ_ONCE(msk->rcvq_space.time));

rtt_us = msk->rcvq_space.rtt_us;
if (rtt_us && time < (rtt_us >> 3))
@@ -3427,6 +3421,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_propagate_sndbuf(nsk, ssk);

mptcp_rcv_space_init(msk, ssk);
+ msk->rcvq_space.time = mptcp_stamp();

if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
__mptcp_subflow_fully_established(msk, subflow, mp_opt);
@@ -3444,8 +3439,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
msk->rcvq_space.copied = 0;
msk->rcvq_space.rtt_us = 0;

- msk->rcvq_space.time = tp->tcp_mstamp;
-
/* initial rcv_space offering made to peer */
msk->rcvq_space.space = min_t(u32, tp->rcv_wnd,
TCP_INIT_CWND * tp->advmss);
@@ -3658,6 +3651,7 @@ void mptcp_finish_connect(struct sock *ssk)
* accessing the field below
*/
WRITE_ONCE(msk->local_key, subflow->local_key);
+ WRITE_ONCE(msk->rcvq_space.time, mptcp_stamp());

mptcp_pm_new_connection(msk, ssk, 0);
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 30d5e5719793..27b1698c5aa2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -870,6 +870,11 @@ static inline bool mptcp_is_fully_established(struct sock *sk)
READ_ONCE(mptcp_sk(sk)->fully_established);
}

+static inline u64 mptcp_stamp(void)
+{
+ return div_u64(tcp_clock_ns(), NSEC_PER_USEC);
+}
+
void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 618fbe1240b5..ecbcdc43263d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -295,6 +295,12 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
return true;
}

+/* rt has device that is down */
+static bool rt_dev_is_down(const struct net_device *dev)
+{
+ return dev && !netif_running(dev);
+}
+
/* Get route to destination or remote server */
static int
__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
@@ -310,9 +316,11 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,

if (dest) {
dest_dst = __ip_vs_dst_check(dest);
- if (likely(dest_dst))
+ if (likely(dest_dst)) {
rt = dst_rtable(dest_dst->dst_cache);
- else {
+ if (ret_saddr)
+ *ret_saddr = dest_dst->dst_saddr.ip;
+ } else {
dest_dst = ip_vs_dest_dst_alloc();
spin_lock_bh(&dest->dst_lock);
if (!dest_dst) {
@@ -328,14 +336,22 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
ip_vs_dest_dst_free(dest_dst);
goto err_unreach;
}
- __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
+ /* It is forbidden to attach dest->dest_dst if
+ * device is going down.
+ */
+ if (!rt_dev_is_down(dst_dev_rcu(&rt->dst)))
+ __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
+ else
+ noref = 0;
spin_unlock_bh(&dest->dst_lock);
IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
&dest->addr.ip, &dest_dst->dst_saddr.ip,
rcuref_read(&rt->dst.__rcuref));
+ if (ret_saddr)
+ *ret_saddr = dest_dst->dst_saddr.ip;
+ if (!noref)
+ ip_vs_dest_dst_free(dest_dst);
}
- if (ret_saddr)
- *ret_saddr = dest_dst->dst_saddr.ip;
} else {
noref = 0;

@@ -472,9 +488,11 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,

if (dest) {
dest_dst = __ip_vs_dst_check(dest);
- if (likely(dest_dst))
+ if (likely(dest_dst)) {
rt = dst_rt6_info(dest_dst->dst_cache);
- else {
+ if (ret_saddr)
+ *ret_saddr = dest_dst->dst_saddr.in6;
+ } else {
u32 cookie;

dest_dst = ip_vs_dest_dst_alloc();
@@ -495,14 +513,22 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
}
rt = dst_rt6_info(dst);
cookie = rt6_get_cookie(rt);
- __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
+ /* It is forbidden to attach dest->dest_dst if
+ * device is going down.
+ */
+ if (!rt_dev_is_down(dst_dev_rcu(&rt->dst)))
+ __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
+ else
+ noref = 0;
spin_unlock_bh(&dest->dst_lock);
IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
&dest->addr.in6, &dest_dst->dst_saddr.in6,
rcuref_read(&rt->dst.__rcuref));
+ if (ret_saddr)
+ *ret_saddr = dest_dst->dst_saddr.in6;
+ if (!noref)
+ ip_vs_dest_dst_free(dest_dst);
}
- if (ret_saddr)
- *ret_saddr = dest_dst->dst_saddr.in6;
} else {
noref = 0;
dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm,
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 828d5c64c68a..14e62b3263cd 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -34,8 +34,9 @@

#define CONNCOUNT_SLOTS 256U

-#define CONNCOUNT_GC_MAX_NODES 8
-#define MAX_KEYLEN 5
+#define CONNCOUNT_GC_MAX_NODES 8
+#define CONNCOUNT_GC_MAX_COLLECT 64
+#define MAX_KEYLEN 5

/* we will save the tuples of all connections we care about */
struct nf_conncount_tuple {
@@ -178,16 +179,28 @@ static int __nf_conncount_add(struct net *net,
return -ENOENT;

if (ct && nf_ct_is_confirmed(ct)) {
- err = -EEXIST;
- goto out_put;
+ /* local connections are confirmed in postrouting so confirmation
+ * might have happened before hitting connlimit
+ */
+ if (skb->skb_iif != LOOPBACK_IFINDEX) {
+ err = -EEXIST;
+ goto out_put;
+ }
+
+ /* this is likely a local connection, skip optimization to avoid
+ * adding duplicates from a 'packet train'
+ */
+ goto check_connections;
}

- if ((u32)jiffies == list->last_gc)
+ if ((u32)jiffies == list->last_gc &&
+ (list->count - list->last_gc_count) < CONNCOUNT_GC_MAX_COLLECT)
goto add_new_node;

+check_connections:
/* check the saved connections */
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
- if (collect > CONNCOUNT_GC_MAX_NODES)
+ if (collect > CONNCOUNT_GC_MAX_COLLECT)
break;

found = find_or_evict(net, list, conn);
@@ -230,6 +243,7 @@ static int __nf_conncount_add(struct net *net,
nf_ct_put(found_ct);
}
list->last_gc = (u32)jiffies;
+ list->last_gc_count = list->count;

add_new_node:
if (WARN_ON_ONCE(list->count > INT_MAX)) {
@@ -277,13 +291,14 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
list->count = 0;
+ list->last_gc_count = 0;
list->last_gc = (u32)jiffies;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);

/* Return true if the list is empty. Must be called with BH disabled. */
-bool nf_conncount_gc_list(struct net *net,
- struct nf_conncount_list *list)
+static bool __nf_conncount_gc_list(struct net *net,
+ struct nf_conncount_list *list)
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn, *conn_n;
@@ -295,10 +310,6 @@ bool nf_conncount_gc_list(struct net *net,
if ((u32)jiffies == READ_ONCE(list->last_gc))
return false;

- /* don't bother if other cpu is already doing GC */
- if (!spin_trylock(&list->list_lock))
- return false;
-
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn);
if (IS_ERR(found)) {
@@ -320,14 +331,29 @@ bool nf_conncount_gc_list(struct net *net,
}

nf_ct_put(found_ct);
- if (collected > CONNCOUNT_GC_MAX_NODES)
+ if (collected > CONNCOUNT_GC_MAX_COLLECT)
break;
}

if (!list->count)
ret = true;
list->last_gc = (u32)jiffies;
- spin_unlock(&list->list_lock);
+ list->last_gc_count = list->count;
+
+ return ret;
+}
+
+bool nf_conncount_gc_list(struct net *net,
+ struct nf_conncount_list *list)
+{
+ bool ret;
+
+ /* don't bother if other cpu is already doing GC */
+ if (!spin_trylock_bh(&list->list_lock))
+ return false;
+
+ ret = __nf_conncount_gc_list(net, list);
+ spin_unlock_bh(&list->list_lock);

return ret;
}
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 14f73872f647..e35814d68ce3 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1186,13 +1186,13 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
{
struct net *net = nf_ct_net(ct);
struct nf_conntrack_expect *exp;
- struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_tuple tuple = {
+ .src.l3num = nf_ct_l3num(ct),
+ .dst.protonum = IPPROTO_TCP,
+ .dst.u.tcp.port = port,
+ };

- memset(&tuple.src.u3, 0, sizeof(tuple.src.u3));
- tuple.src.u.tcp.port = 0;
memcpy(&tuple.dst.u3, addr, sizeof(tuple.dst.u3));
- tuple.dst.u.tcp.port = port;
- tuple.dst.protonum = IPPROTO_TCP;

exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
if (exp && exp->master == ct)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6059a299004d..df367638cdef 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2822,6 +2822,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 policy,

err_register_hook:
nft_chain_del(chain);
+ synchronize_rcu();
err_chain_add:
nft_trans_destroy(trans);
err_trans:
@@ -7271,7 +7272,8 @@ static u32 nft_set_maxsize(const struct nft_set *set)
}

static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
- const struct nlattr *attr, u32 nlmsg_flags)
+ const struct nlattr *attr, u32 nlmsg_flags,
+ bool last)
{
struct nft_expr *expr_array[NFT_SET_EXPR_MAX] = {};
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
@@ -7557,6 +7559,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (flags)
*nft_set_ext_flags(ext) = flags;

+ if (last)
+ elem.flags = NFT_SET_ELEM_INTERNAL_LAST;
+ else
+ elem.flags = 0;
+
if (obj)
*nft_set_ext_obj(ext) = obj;

@@ -7637,6 +7644,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
* and an existing one.
*/
err = -EEXIST;
+ } else if (err == -ECANCELED) {
+ /* ECANCELED reports an existing nul-element in
+ * interval sets.
+ */
+ err = 0;
}
goto err_element_clash;
}
@@ -7715,7 +7727,8 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);

nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
- err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags);
+ err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags,
+ nla_is_last(attr, rem));
if (err < 0) {
NL_SET_BAD_ATTR(extack, attr);
return err;
@@ -7838,7 +7851,7 @@ static void nft_trans_elems_destroy_abort(const struct nft_ctx *ctx,
}

static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
- const struct nlattr *attr)
+ const struct nlattr *attr, bool last)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_set_ext_tmpl tmpl;
@@ -7906,6 +7919,11 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (flags)
*nft_set_ext_flags(ext) = flags;

+ if (last)
+ elem.flags = NFT_SET_ELEM_INTERNAL_LAST;
+ else
+ elem.flags = 0;
+
trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
if (trans == NULL)
goto fail_trans;
@@ -8053,7 +8071,8 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
return nft_set_flush(&ctx, set, genmask);

nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
- err = nft_del_setelem(&ctx, set, attr);
+ err = nft_del_setelem(&ctx, set, attr,
+ nla_is_last(attr, rem));
if (err == -ENOENT &&
NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYSETELEM)
continue;
@@ -11538,6 +11557,13 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
ret = __nf_tables_abort(net, action);
nft_gc_seq_end(nft_net, gc_seq);

+ if (action == NFNL_ABORT_NONE) {
+ struct nft_table *table;
+
+ list_for_each_entry(table, &nft_net->tables, list)
+ table->validate_state = NFT_VALIDATE_SKIP;
+ }
+
WARN_ON_ONCE(!list_empty(&nft_net->commit_list));

/* module autoload needs to happen after GC sequence update because it
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8b7b39d8a109..34548213f2f1 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -30,6 +30,8 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/list.h>
#include <linux/cgroup-defs.h>
+#include <linux/rhashtable.h>
+#include <linux/jhash.h>
#include <net/gso.h>
#include <net/sock.h>
#include <net/tcp_states.h>
@@ -47,6 +49,8 @@
#endif

#define NFQNL_QMAX_DEFAULT 1024
+#define NFQNL_HASH_MIN 1024
+#define NFQNL_HASH_MAX 1048576

/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
* includes the header length. Thus, the maximum packet length that we
@@ -56,6 +60,26 @@
*/
#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)

+/* Composite key for packet lookup: (net, queue_num, packet_id) */
+struct nfqnl_packet_key {
+ possible_net_t net;
+ u32 packet_id;
+ u16 queue_num;
+} __aligned(sizeof(u32)); /* jhash2 requires 32-bit alignment */
+
+/* Global rhashtable - one for entire system, all netns */
+static struct rhashtable nfqnl_packet_map __read_mostly;
+
+/* Helper to initialize composite key */
+static inline void nfqnl_init_key(struct nfqnl_packet_key *key,
+ struct net *net, u32 packet_id, u16 queue_num)
+{
+ memset(key, 0, sizeof(*key));
+ write_pnet(&key->net, net);
+ key->packet_id = packet_id;
+ key->queue_num = queue_num;
+}
+
struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */
struct rcu_head rcu;
@@ -100,6 +124,39 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
}

+/* Extract composite key from nf_queue_entry for hashing */
+static u32 nfqnl_packet_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct nf_queue_entry *entry = data;
+ struct nfqnl_packet_key key;
+
+ nfqnl_init_key(&key, entry->state.net, entry->id, entry->queue_num);
+
+ return jhash2((u32 *)&key, sizeof(key) / sizeof(u32), seed);
+}
+
+/* Compare stack-allocated key against entry */
+static int nfqnl_packet_obj_cmpfn(struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const struct nfqnl_packet_key *key = arg->key;
+ const struct nf_queue_entry *entry = obj;
+
+ return !net_eq(entry->state.net, read_pnet(&key->net)) ||
+ entry->queue_num != key->queue_num ||
+ entry->id != key->packet_id;
+}
+
+static const struct rhashtable_params nfqnl_rhashtable_params = {
+ .head_offset = offsetof(struct nf_queue_entry, hash_node),
+ .key_len = sizeof(struct nfqnl_packet_key),
+ .obj_hashfn = nfqnl_packet_obj_hashfn,
+ .obj_cmpfn = nfqnl_packet_obj_cmpfn,
+ .automatic_shrinking = true,
+ .min_size = NFQNL_HASH_MIN,
+ .max_size = NFQNL_HASH_MAX,
+};
+
static struct nfqnl_instance *
instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
{
@@ -191,33 +248,45 @@ instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
spin_unlock(&q->instances_lock);
}

-static inline void
+static int
__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
- list_add_tail(&entry->list, &queue->queue_list);
- queue->queue_total++;
+ int err;
+
+ entry->queue_num = queue->queue_num;
+
+ err = rhashtable_insert_fast(&nfqnl_packet_map, &entry->hash_node,
+ nfqnl_rhashtable_params);
+ if (unlikely(err))
+ return err;
+
+ list_add_tail(&entry->list, &queue->queue_list);
+ queue->queue_total++;
+
+ return 0;
}

static void
__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
+ rhashtable_remove_fast(&nfqnl_packet_map, &entry->hash_node,
+ nfqnl_rhashtable_params);
list_del(&entry->list);
queue->queue_total--;
}

static struct nf_queue_entry *
-find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
+find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id,
+ struct net *net)
{
- struct nf_queue_entry *entry = NULL, *i;
+ struct nfqnl_packet_key key;
+ struct nf_queue_entry *entry;

- spin_lock_bh(&queue->lock);
+ nfqnl_init_key(&key, net, id, queue->queue_num);

- list_for_each_entry(i, &queue->queue_list, list) {
- if (i->id == id) {
- entry = i;
- break;
- }
- }
+ spin_lock_bh(&queue->lock);
+ entry = rhashtable_lookup_fast(&nfqnl_packet_map, &key,
+ nfqnl_rhashtable_params);

if (entry)
__dequeue_entry(queue, entry);
@@ -369,6 +438,34 @@ static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
nf_queue_entry_free(entry);
}

+/* return true if the entry has an unconfirmed conntrack attached that isn't owned by us
+ * exclusively.
+ */
+static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry, bool *is_unconfirmed)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+
+ if (!ct || nf_ct_is_confirmed(ct))
+ return false;
+
+ if (is_unconfirmed)
+ *is_unconfirmed = true;
+
+ /* in some cases skb_clone() can occur after initial conntrack
+ * pickup, but conntrack assumes exclusive skb->_nfct ownership for
+ * unconfirmed entries.
+ *
+ * This happens for br_netfilter and with ip multicast routing.
+ * This can't be solved with serialization here because one clone
+ * could have been queued for local delivery or could be transmitted
+ * in parallel on another CPU.
+ */
+ return refcount_read(&ct->ct_general.use) > 1;
+#endif
+ return false;
+}
+
static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
const struct nf_ct_hook *ct_hook;
@@ -396,6 +493,24 @@ static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
break;
}
}
+
+ if (verdict != NF_DROP && entry->nf_ct_is_unconfirmed) {
+ /* If first queued segment was already reinjected then
+ * there is a good chance the ct entry is now confirmed.
+ *
+ * Handle the rare cases:
+ * - out-of-order verdict
+ * - threaded userspace reinjecting in parallel
+ * - first segment was dropped
+ *
+ * In all of those cases we can't handle this packet
+ * because we can't be sure that another CPU won't modify
+ * nf_conn->ext in parallel which isn't allowed.
+ */
+ if (nf_ct_drop_unconfirmed(entry, NULL))
+ verdict = NF_DROP;
+ }
+
nf_reinject(entry, verdict);
}

@@ -407,8 +522,7 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
spin_lock_bh(&queue->lock);
list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
if (!cmpfn || cmpfn(entry, data)) {
- list_del(&entry->list);
- queue->queue_total--;
+ __dequeue_entry(queue, entry);
nfqnl_reinject(entry, NF_DROP);
}
}
@@ -826,49 +940,6 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
return NULL;
}

-static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
-{
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
- struct nf_conn *ct = (void *)skb_nfct(entry->skb);
- unsigned long status;
- unsigned int use;
-
- if (!ct)
- return false;
-
- status = READ_ONCE(ct->status);
- if ((status & flags) == IPS_DYING)
- return true;
-
- if (status & IPS_CONFIRMED)
- return false;
-
- /* in some cases skb_clone() can occur after initial conntrack
- * pickup, but conntrack assumes exclusive skb->_nfct ownership for
- * unconfirmed entries.
- *
- * This happens for br_netfilter and with ip multicast routing.
- * We can't be solved with serialization here because one clone could
- * have been queued for local delivery.
- */
- use = refcount_read(&ct->ct_general.use);
- if (likely(use == 1))
- return false;
-
- /* Can't decrement further? Exclusive ownership. */
- if (!refcount_dec_not_one(&ct->ct_general.use))
- return false;
-
- skb_set_nfct(entry->skb, 0);
- /* No nf_ct_put(): we already decremented .use and it cannot
- * drop down to 0.
- */
- return true;
-#endif
- return false;
-}
-
static int
__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry)
@@ -885,26 +956,23 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
}
spin_lock_bh(&queue->lock);

- if (nf_ct_drop_unconfirmed(entry))
- goto err_out_free_nskb;
+ if (queue->queue_total >= queue->queue_maxlen)
+ goto err_out_queue_drop;

- if (queue->queue_total >= queue->queue_maxlen) {
- if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
- failopen = 1;
- err = 0;
- } else {
- queue->queue_dropped++;
- net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
- queue->queue_total);
- }
- goto err_out_free_nskb;
- }
entry->id = ++queue->id_sequence;
*packet_id_ptr = htonl(entry->id);

+ /* Insert into hash BEFORE unicast. If failure don't send to userspace. */
+ err = __enqueue_entry(queue, entry);
+ if (unlikely(err))
+ goto err_out_queue_drop;
+
/* nfnetlink_unicast will either free the nskb or add it to a socket */
err = nfnetlink_unicast(nskb, net, queue->peer_portid);
if (err < 0) {
+ /* Unicast failed - remove entry we just inserted */
+ __dequeue_entry(queue, entry);
+
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
failopen = 1;
err = 0;
@@ -914,12 +982,22 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
goto err_out_unlock;
}

- __enqueue_entry(queue, entry);
-
spin_unlock_bh(&queue->lock);
return 0;

-err_out_free_nskb:
+err_out_queue_drop:
+ if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
+ failopen = 1;
+ err = 0;
+ } else {
+ queue->queue_dropped++;
+
+ if (queue->queue_total >= queue->queue_maxlen)
+ net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
+ queue->queue_total);
+ else
+ net_warn_ratelimited("nf_queue: hash insert failed: %d\n", err);
+ }
kfree_skb(nskb);
err_out_unlock:
spin_unlock_bh(&queue->lock);
@@ -998,9 +1076,10 @@ __nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
static int
nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
{
- unsigned int queued;
- struct nfqnl_instance *queue;
struct sk_buff *skb, *segs, *nskb;
+ bool ct_is_unconfirmed = false;
+ struct nfqnl_instance *queue;
+ unsigned int queued;
int err = -ENOBUFS;
struct net *net = entry->state.net;
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
@@ -1024,6 +1103,15 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
break;
}

+ /* Check if someone already holds another reference to
+ * unconfirmed ct. If so, we cannot queue the skb:
+ * concurrent modifications of nf_conn->ext are not
+ * allowed and we can't know if another CPU isn't
+ * processing the same nf_conn entry in parallel.
+ */
+ if (nf_ct_drop_unconfirmed(entry, &ct_is_unconfirmed))
+ return -EINVAL;
+
if (!skb_is_gso(skb) || ((queue->flags & NFQA_CFG_F_GSO) && !skb_is_gso_sctp(skb)))
return __nfqnl_enqueue_packet(net, queue, entry);

@@ -1037,7 +1125,23 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
goto out_err;
queued = 0;
err = 0;
+
skb_list_walk_safe(segs, segs, nskb) {
+ if (ct_is_unconfirmed && queued > 0) {
+ /* skb_gso_segment() increments the ct refcount.
+ * This is a problem for unconfirmed (not in hash)
+ * entries, those can race when reinjections happen
+ * in parallel.
+ *
+ * Annotate this for all queued entries except the
+ * first one.
+ *
+ * As long as the first one is reinjected first it
+ * will do the confirmation for us.
+ */
+ entry->nf_ct_is_unconfirmed = ct_is_unconfirmed;
+ }
+
if (err == 0)
err = __nfqnl_enqueue_packet_gso(net, queue,
segs, entry);
@@ -1430,7 +1534,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,

verdict = ntohl(vhdr->verdict);

- entry = find_dequeue_entry(queue, ntohl(vhdr->id));
+ entry = find_dequeue_entry(queue, ntohl(vhdr->id), info->net);
if (entry == NULL)
return -ENOENT;

@@ -1781,10 +1885,14 @@ static int __init nfnetlink_queue_init(void)
{
int status;

+ status = rhashtable_init(&nfqnl_packet_map, &nfqnl_rhashtable_params);
+ if (status < 0)
+ return status;
+
status = register_pernet_subsys(&nfnl_queue_net_ops);
if (status < 0) {
pr_err("failed to register pernet ops\n");
- goto out;
+ goto cleanup_rhashtable;
}

netlink_register_notifier(&nfqnl_rtnl_notifier);
@@ -1809,7 +1917,8 @@ static int __init nfnetlink_queue_init(void)
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
unregister_pernet_subsys(&nfnl_queue_net_ops);
-out:
+cleanup_rhashtable:
+ rhashtable_destroy(&nfqnl_packet_map);
return status;
}

@@ -1821,6 +1930,8 @@ static void __exit nfnetlink_queue_fini(void)
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
unregister_pernet_subsys(&nfnl_queue_net_ops);

+ rhashtable_destroy(&nfqnl_packet_map);
+
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 72711d62fddf..08f620311b03 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -134,7 +134,8 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
}

static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
- [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING },
+ [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING,
+ .len = XT_EXTENSION_MAXNAMELEN, },
[NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_TARGET_INFO] = { .type = NLA_BINARY },
};
@@ -434,7 +435,8 @@ static void nft_match_eval(const struct nft_expr *expr,
}

static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
- [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
+ [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING,
+ .len = XT_EXTENSION_MAXNAMELEN },
[NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_MATCH_INFO] = { .type = NLA_BINARY },
};
@@ -693,7 +695,12 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,

name = nla_data(tb[NFTA_COMPAT_NAME]);
rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV]));
- target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE]));
+ /* x_tables api checks for 'target == 1' to mean target,
+ * everything else means 'match'.
+ * In x_tables world, the number is set by kernel, not
+ * userspace.
+ */
+ target = nla_get_be32(tb[NFTA_COMPAT_TYPE]) == htonl(1);

switch(family) {
case AF_INET:
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index d4964087bbc5..714a59485935 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -232,13 +232,8 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
{
struct nft_connlimit *priv = nft_expr_priv(expr);
- bool ret;

- local_bh_disable();
- ret = nf_conncount_gc_list(net, priv->list);
- local_bh_enable();
-
- return ret;
+ return nf_conncount_gc_list(net, priv->list);
}

static struct nft_expr_type nft_connlimit_type;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index cc7325329496..0d70325280cc 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -117,8 +117,8 @@ static void nft_counter_reset(struct nft_counter_percpu_priv *priv,
nft_sync = this_cpu_ptr(&nft_counter_sync);

u64_stats_update_begin(nft_sync);
- u64_stats_add(&this_cpu->packets, -total->packets);
- u64_stats_add(&this_cpu->bytes, -total->bytes);
+ u64_stats_sub(&this_cpu->packets, total->packets);
+ u64_stats_sub(&this_cpu->bytes, total->bytes);
u64_stats_update_end(nft_sync);

local_bh_enable();
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index ba01ce75d6de..739b992bde59 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -619,15 +619,20 @@ static struct nft_elem_priv *
nft_hash_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
+ const u32 *key = (const u32 *)&elem->key.val;
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
struct nft_hash_elem *he;
u32 hash;

- hash = jhash(elem->key.val.data, set->klen, priv->seed);
+ if (set->klen == 4)
+ hash = jhash_1word(*key, priv->seed);
+ else
+ hash = jhash(key, set->klen, priv->seed);
+
hash = reciprocal_scale(hash, priv->buckets);
hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
- if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
+ if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) &&
nft_set_elem_active(&he->ext, genmask))
return &he->priv;
}
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index ca594161b840..644d4b916705 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -10,21 +10,41 @@
#include <linux/module.h>
#include <linux/list.h>
#include <linux/rbtree.h>
+#include <linux/bsearch.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>

+struct nft_array_interval {
+ struct nft_set_ext *from;
+ struct nft_set_ext *to;
+};
+
+struct nft_array {
+ u32 max_intervals;
+ u32 num_intervals;
+ struct nft_array_interval *intervals;
+ struct rcu_head rcu_head;
+};
+
struct nft_rbtree {
struct rb_root root;
rwlock_t lock;
- seqcount_rwlock_t count;
+ struct nft_array __rcu *array;
+ struct nft_array *array_next;
+ unsigned long start_rbe_cookie;
unsigned long last_gc;
+ struct list_head expired;
+ u64 last_tstamp;
};

struct nft_rbtree_elem {
struct nft_elem_priv priv;
- struct rb_node node;
+ union {
+ struct rb_node node;
+ struct list_head list;
+ };
struct nft_set_ext ext;
};

@@ -39,6 +59,13 @@ static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
return !nft_rbtree_interval_end(rbe);
}

+static bool nft_rbtree_interval_null(const struct nft_set *set,
+ const struct nft_rbtree_elem *rbe)
+{
+ return (!memchr_inv(nft_set_ext_key(&rbe->ext), 0, set->klen) &&
+ nft_rbtree_interval_end(rbe));
+}
+
static int nft_rbtree_cmp(const struct nft_set *set,
const struct nft_rbtree_elem *e1,
const struct nft_rbtree_elem *e2)
@@ -47,67 +74,33 @@ static int nft_rbtree_cmp(const struct nft_set *set,
set->klen);
}

-static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
-{
- return nft_set_elem_expired(&rbe->ext);
-}
+struct nft_array_lookup_ctx {
+ const u32 *key;
+ u32 klen;
+};

-static const struct nft_set_ext *
-__nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, unsigned int seq)
+static int nft_array_lookup_cmp(const void *pkey, const void *entry)
{
- struct nft_rbtree *priv = nft_set_priv(set);
- const struct nft_rbtree_elem *rbe, *interval = NULL;
- u8 genmask = nft_genmask_cur(net);
- const struct rb_node *parent;
- int d;
+ const struct nft_array_interval *interval = entry;
+ const struct nft_array_lookup_ctx *ctx = pkey;
+ int a, b;

- parent = rcu_dereference_raw(priv->root.rb_node);
- while (parent != NULL) {
- if (read_seqcount_retry(&priv->count, seq))
- return NULL;
-
- rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+ if (!interval->from)
+ return 1;

- d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
- if (d < 0) {
- parent = rcu_dereference_raw(parent->rb_left);
- if (interval &&
- !nft_rbtree_cmp(set, rbe, interval) &&
- nft_rbtree_interval_end(rbe) &&
- nft_rbtree_interval_start(interval))
- continue;
- if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_rbtree_elem_expired(rbe))
- interval = rbe;
- } else if (d > 0)
- parent = rcu_dereference_raw(parent->rb_right);
- else {
- if (!nft_set_elem_active(&rbe->ext, genmask)) {
- parent = rcu_dereference_raw(parent->rb_left);
- continue;
- }
-
- if (nft_rbtree_elem_expired(rbe))
- return NULL;
-
- if (nft_rbtree_interval_end(rbe)) {
- if (nft_set_is_anonymous(set))
- return NULL;
- parent = rcu_dereference_raw(parent->rb_left);
- interval = NULL;
- continue;
- }
+ a = memcmp(ctx->key, nft_set_ext_key(interval->from), ctx->klen);
+ if (!interval->to)
+ b = -1;
+ else
+ b = memcmp(ctx->key, nft_set_ext_key(interval->to), ctx->klen);

- return &rbe->ext;
- }
- }
+ if (a >= 0 && b < 0)
+ return 0;

- if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
- nft_rbtree_interval_start(interval))
- return &interval->ext;
+ if (a < 0)
+ return -1;

- return NULL;
+ return 1;
}

INDIRECT_CALLABLE_SCOPE
@@ -116,83 +109,57 @@ nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
struct nft_rbtree *priv = nft_set_priv(set);
- unsigned int seq = read_seqcount_begin(&priv->count);
- const struct nft_set_ext *ext;
-
- ext = __nft_rbtree_lookup(net, set, key, seq);
- if (ext || !read_seqcount_retry(&priv->count, seq))
- return ext;
-
- read_lock_bh(&priv->lock);
- seq = read_seqcount_begin(&priv->count);
- ext = __nft_rbtree_lookup(net, set, key, seq);
- read_unlock_bh(&priv->lock);
-
- return ext;
+ struct nft_array *array = rcu_dereference(priv->array);
+ const struct nft_array_interval *interval;
+ struct nft_array_lookup_ctx ctx = {
+ .key = key,
+ .klen = set->klen,
+ };
+
+ if (!array)
+ return NULL;
+
+ interval = bsearch(&ctx, array->intervals, array->num_intervals,
+ sizeof(struct nft_array_interval),
+ nft_array_lookup_cmp);
+ if (!interval || nft_set_elem_expired(interval->from))
+ return NULL;
+
+ return interval->from;
}

-static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
- const u32 *key, struct nft_rbtree_elem **elem,
- unsigned int seq, unsigned int flags, u8 genmask)
-{
- struct nft_rbtree_elem *rbe, *interval = NULL;
- struct nft_rbtree *priv = nft_set_priv(set);
- const struct rb_node *parent;
- const void *this;
- int d;
-
- parent = rcu_dereference_raw(priv->root.rb_node);
- while (parent != NULL) {
- if (read_seqcount_retry(&priv->count, seq))
- return false;
-
- rbe = rb_entry(parent, struct nft_rbtree_elem, node);
-
- this = nft_set_ext_key(&rbe->ext);
- d = memcmp(this, key, set->klen);
- if (d < 0) {
- parent = rcu_dereference_raw(parent->rb_left);
- if (!(flags & NFT_SET_ELEM_INTERVAL_END))
- interval = rbe;
- } else if (d > 0) {
- parent = rcu_dereference_raw(parent->rb_right);
- if (flags & NFT_SET_ELEM_INTERVAL_END)
- interval = rbe;
- } else {
- if (!nft_set_elem_active(&rbe->ext, genmask)) {
- parent = rcu_dereference_raw(parent->rb_left);
- continue;
- }
+struct nft_array_get_ctx {
+ const u32 *key;
+ unsigned int flags;
+ u32 klen;
+};

- if (nft_set_elem_expired(&rbe->ext))
- return false;
+static int nft_array_get_cmp(const void *pkey, const void *entry)
+{
+ const struct nft_array_interval *interval = entry;
+ const struct nft_array_get_ctx *ctx = pkey;
+ int a, b;

- if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
- (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
- (flags & NFT_SET_ELEM_INTERVAL_END)) {
- *elem = rbe;
- return true;
- }
+ if (!interval->from)
+ return 1;

- if (nft_rbtree_interval_end(rbe))
- interval = NULL;
+ a = memcmp(ctx->key, nft_set_ext_key(interval->from), ctx->klen);
+ if (!interval->to)
+ b = -1;
+ else
+ b = memcmp(ctx->key, nft_set_ext_key(interval->to), ctx->klen);

- parent = rcu_dereference_raw(parent->rb_left);
- }
+ if (a >= 0) {
+ if (ctx->flags & NFT_SET_ELEM_INTERVAL_END && b <= 0)
+ return 0;
+ else if (b < 0)
+ return 0;
}

- if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
- nft_set_elem_active(&interval->ext, genmask) &&
- !nft_set_elem_expired(&interval->ext) &&
- ((!nft_rbtree_interval_end(interval) &&
- !(flags & NFT_SET_ELEM_INTERVAL_END)) ||
- (nft_rbtree_interval_end(interval) &&
- (flags & NFT_SET_ELEM_INTERVAL_END)))) {
- *elem = interval;
- return true;
- }
+ if (a < 0)
+ return -1;

- return false;
+ return 1;
}

static struct nft_elem_priv *
@@ -200,34 +167,41 @@ nft_rbtree_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_rbtree *priv = nft_set_priv(set);
- unsigned int seq = read_seqcount_begin(&priv->count);
- struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT);
- const u32 *key = (const u32 *)&elem->key.val;
- u8 genmask = nft_genmask_cur(net);
- bool ret;
-
- ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
- if (ret || !read_seqcount_retry(&priv->count, seq))
- return &rbe->priv;
-
- read_lock_bh(&priv->lock);
- seq = read_seqcount_begin(&priv->count);
- ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
- read_unlock_bh(&priv->lock);
-
- if (!ret)
+ struct nft_array *array = rcu_dereference(priv->array);
+ const struct nft_array_interval *interval;
+ struct nft_array_get_ctx ctx = {
+ .key = (const u32 *)&elem->key.val,
+ .flags = flags,
+ .klen = set->klen,
+ };
+ struct nft_rbtree_elem *rbe;
+
+ if (!array)
return ERR_PTR(-ENOENT);

+ interval = bsearch(&ctx, array->intervals, array->num_intervals,
+ sizeof(struct nft_array_interval), nft_array_get_cmp);
+ if (!interval || nft_set_elem_expired(interval->from))
+ return ERR_PTR(-ENOENT);
+
+ if (flags & NFT_SET_ELEM_INTERVAL_END)
+ rbe = container_of(interval->to, struct nft_rbtree_elem, ext);
+ else
+ rbe = container_of(interval->from, struct nft_rbtree_elem, ext);
+
return &rbe->priv;
}

-static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set,
- struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe)
+static void nft_rbtree_gc_elem_move(struct net *net, struct nft_set *set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
{
lockdep_assert_held_write(&priv->lock);
nft_setelem_data_deactivate(net, set, &rbe->priv);
rb_erase(&rbe->node, &priv->root);
+
+ /* collected later on in commit callback */
+ list_add(&rbe->list, &priv->expired);
}

static const struct nft_rbtree_elem *
@@ -238,11 +212,6 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
struct rb_node *prev = rb_prev(&rbe->node);
struct net *net = read_pnet(&set->net);
struct nft_rbtree_elem *rbe_prev;
- struct nft_trans_gc *gc;
-
- gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
- if (!gc)
- return ERR_PTR(-ENOMEM);

/* search for end interval coming before this element.
* end intervals don't carry a timeout extension, they
@@ -260,28 +229,10 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
rbe_prev = NULL;
if (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
- nft_rbtree_gc_elem_remove(net, set, priv, rbe_prev);
-
- /* There is always room in this trans gc for this element,
- * memory allocation never actually happens, hence, the warning
- * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
- * this is synchronous gc which never fails.
- */
- gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
- if (WARN_ON_ONCE(!gc))
- return ERR_PTR(-ENOMEM);
-
- nft_trans_gc_elem_add(gc, rbe_prev);
+ nft_rbtree_gc_elem_move(net, set, priv, rbe_prev);
}

- nft_rbtree_gc_elem_remove(net, set, priv, rbe);
- gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
- if (WARN_ON_ONCE(!gc))
- return ERR_PTR(-ENOMEM);
-
- nft_trans_gc_elem_add(gc, rbe);
-
- nft_trans_gc_queue_sync_done(gc);
+ nft_rbtree_gc_elem_move(net, set, priv, rbe);

return rbe_prev;
}
@@ -302,16 +253,107 @@ static bool nft_rbtree_update_first(const struct nft_set *set,
return false;
}

+/* Only for anonymous sets which do not allow updates, all element are active. */
+static struct nft_rbtree_elem *nft_rbtree_prev_active(struct nft_rbtree_elem *rbe)
+{
+ struct rb_node *node;
+
+ node = rb_prev(&rbe->node);
+ if (!node)
+ return NULL;
+
+ return rb_entry(node, struct nft_rbtree_elem, node);
+}
+
+static struct nft_rbtree_elem *
+__nft_rbtree_next_active(struct rb_node *node, u8 genmask)
+{
+ struct nft_rbtree_elem *next_rbe;
+
+ while (node) {
+ next_rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ if (!nft_set_elem_active(&next_rbe->ext, genmask)) {
+ node = rb_next(node);
+ continue;
+ }
+
+ return next_rbe;
+ }
+
+ return NULL;
+}
+
+static struct nft_rbtree_elem *
+nft_rbtree_next_active(struct nft_rbtree_elem *rbe, u8 genmask)
+{
+ return __nft_rbtree_next_active(rb_next(&rbe->node), genmask);
+}
+
+static void nft_rbtree_maybe_reset_start_cookie(struct nft_rbtree *priv,
+ u64 tstamp)
+{
+ if (priv->last_tstamp != tstamp) {
+ priv->start_rbe_cookie = 0;
+ priv->last_tstamp = tstamp;
+ }
+}
+
+static void nft_rbtree_set_start_cookie(struct nft_rbtree *priv,
+ const struct nft_rbtree_elem *rbe)
+{
+ priv->start_rbe_cookie = (unsigned long)rbe;
+}
+
+static void nft_rbtree_set_start_cookie_open(struct nft_rbtree *priv,
+ const struct nft_rbtree_elem *rbe,
+ unsigned long open_interval)
+{
+ priv->start_rbe_cookie = (unsigned long)rbe | open_interval;
+}
+
+#define NFT_RBTREE_OPEN_INTERVAL 1UL
+
+static bool nft_rbtree_cmp_start_cookie(struct nft_rbtree *priv,
+ const struct nft_rbtree_elem *rbe)
+{
+ return (priv->start_rbe_cookie & ~NFT_RBTREE_OPEN_INTERVAL) == (unsigned long)rbe;
+}
+
+static bool nft_rbtree_insert_same_interval(const struct net *net,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ u8 genmask = nft_genmask_next(net);
+ struct nft_rbtree_elem *next_rbe;
+
+ if (!priv->start_rbe_cookie)
+ return true;
+
+ next_rbe = nft_rbtree_next_active(rbe, genmask);
+ if (next_rbe) {
+ /* Closest start element differs from last element added. */
+ if (nft_rbtree_interval_start(next_rbe) &&
+ nft_rbtree_cmp_start_cookie(priv, next_rbe)) {
+ priv->start_rbe_cookie = 0;
+ return true;
+ }
+ }
+
+ priv->start_rbe_cookie = 0;
+
+ return false;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
- struct nft_elem_priv **elem_priv)
+ struct nft_elem_priv **elem_priv, u64 tstamp, bool last)
{
- struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
+ struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL, *rbe_prev;
struct rb_node *node, *next, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
- u64 tstamp = nft_net_tstamp(net);
+ unsigned long open_interval = 0;
int d;

/* Descend the tree to search for an existing element greater than the
@@ -417,12 +459,46 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
}

+ if (nft_rbtree_interval_null(set, new)) {
+ priv->start_rbe_cookie = 0;
+ } else if (nft_rbtree_interval_start(new) && priv->start_rbe_cookie) {
+ if (nft_set_is_anonymous(set)) {
+ priv->start_rbe_cookie = 0;
+ } else if (priv->start_rbe_cookie & NFT_RBTREE_OPEN_INTERVAL) {
+ /* Previous element is an open interval that partially
+ * overlaps with an existing non-open interval.
+ */
+ return -ENOTEMPTY;
+ }
+ }
+
/* - new start element matching existing start element: full overlap
* reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
*/
if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
*elem_priv = &rbe_ge->priv;
+
+ /* - Corner case: new start element of open interval (which
+ * comes as last element in the batch) overlaps the start of
+ * an existing interval with an end element: partial overlap.
+ */
+ node = rb_first(&priv->root);
+ rbe = __nft_rbtree_next_active(node, genmask);
+ if (rbe && nft_rbtree_interval_end(rbe)) {
+ rbe = nft_rbtree_next_active(rbe, genmask);
+ if (rbe &&
+ nft_rbtree_interval_start(rbe) &&
+ !nft_rbtree_cmp(set, new, rbe)) {
+ if (last)
+ return -ENOTEMPTY;
+
+ /* Maybe open interval? */
+ open_interval = NFT_RBTREE_OPEN_INTERVAL;
+ }
+ }
+ nft_rbtree_set_start_cookie_open(priv, rbe_ge, open_interval);
+
return -EEXIST;
}

@@ -431,18 +507,37 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*/
if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
+ /* - ignore null interval, otherwise NLM_F_CREATE bogusly
+ * reports EEXIST.
+ */
+ if (nft_rbtree_interval_null(set, new))
+ return -ECANCELED;
+
*elem_priv = &rbe_le->priv;
+
+ /* - start and end element belong to the same interval. */
+ if (!nft_rbtree_insert_same_interval(net, priv, rbe_le))
+ return -ENOTEMPTY;
+
return -EEXIST;
}

/* - new start element with existing closest, less or equal key value
* being a start element: partial overlap, reported as -ENOTEMPTY.
* Anonymous sets allow for two consecutive start element since they
- * are constant, skip them to avoid bogus overlap reports.
+ * are constant, but validate that this new start element does not
+ * sit in between an existing start and end elements: partial overlap,
+ * reported as -ENOTEMPTY.
*/
- if (!nft_set_is_anonymous(set) && rbe_le &&
- nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new))
- return -ENOTEMPTY;
+ if (rbe_le &&
+ nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new)) {
+ if (!nft_set_is_anonymous(set))
+ return -ENOTEMPTY;
+
+ rbe_prev = nft_rbtree_prev_active(rbe_le);
+ if (rbe_prev && nft_rbtree_interval_end(rbe_prev))
+ return -ENOTEMPTY;
+ }

/* - new end element with existing closest, less or equal key value
* being a end element: partial overlap, reported as -ENOTEMPTY.
@@ -458,6 +553,12 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new))
return -ENOTEMPTY;

+ /* - start element overlaps an open interval but end element is new:
+ * partial overlap, reported as -ENOEMPTY.
+ */
+ if (!rbe_ge && priv->start_rbe_cookie && nft_rbtree_interval_end(new))
+ return -ENOTEMPTY;
+
/* Accepted element: pick insertion point depending on key value */
parent = NULL;
p = &priv->root.rb_node;
@@ -481,14 +582,102 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
return 0;
}

+static int nft_array_intervals_alloc(struct nft_array *array, u32 max_intervals)
+{
+ struct nft_array_interval *intervals;
+
+ intervals = kvcalloc(max_intervals, sizeof(struct nft_array_interval),
+ GFP_KERNEL_ACCOUNT);
+ if (!intervals)
+ return -ENOMEM;
+
+ if (array->intervals)
+ kvfree(array->intervals);
+
+ array->intervals = intervals;
+ array->max_intervals = max_intervals;
+
+ return 0;
+}
+
+static struct nft_array *nft_array_alloc(u32 max_intervals)
+{
+ struct nft_array *array;
+
+ array = kzalloc(sizeof(*array), GFP_KERNEL_ACCOUNT);
+ if (!array)
+ return NULL;
+
+ if (nft_array_intervals_alloc(array, max_intervals) < 0) {
+ kfree(array);
+ return NULL;
+ }
+
+ return array;
+}
+
+#define NFT_ARRAY_EXTRA_SIZE 10240
+
+/* Similar to nft_rbtree_{u,k}size to hide details to userspace, but consider
+ * packed representation coming from userspace for anonymous sets too.
+ */
+static u32 nft_array_elems(const struct nft_set *set)
+{
+ u32 nelems = atomic_read(&set->nelems);
+
+ /* Adjacent intervals are represented with a single start element in
+ * anonymous sets, use the current element counter as is.
+ */
+ if (nft_set_is_anonymous(set))
+ return nelems;
+
+ /* Add extra room for never matching interval at the beginning and open
+ * interval at the end which only use a single element to represent it.
+ * The conversion to array will compact intervals, this allows reduce
+ * memory consumption.
+ */
+ return (nelems / 2) + 2;
+}
+
+static int nft_array_may_resize(const struct nft_set *set)
+{
+ u32 nelems = nft_array_elems(set), new_max_intervals;
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_array *array;
+
+ if (!priv->array_next) {
+ array = nft_array_alloc(nelems + NFT_ARRAY_EXTRA_SIZE);
+ if (!array)
+ return -ENOMEM;
+
+ priv->array_next = array;
+ }
+
+ if (nelems < priv->array_next->max_intervals)
+ return 0;
+
+ new_max_intervals = priv->array_next->max_intervals + NFT_ARRAY_EXTRA_SIZE;
+ if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_elem_priv **elem_priv)
{
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv);
+ bool last = !!(elem->flags & NFT_SET_ELEM_INTERNAL_LAST);
struct nft_rbtree *priv = nft_set_priv(set);
+ u64 tstamp = nft_net_tstamp(net);
int err;

+ nft_rbtree_maybe_reset_start_cookie(priv, tstamp);
+
+ if (nft_array_may_resize(set) < 0)
+ return -ENOMEM;
+
do {
if (fatal_signal_pending(current))
return -EINTR;
@@ -496,10 +685,12 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
cond_resched();

write_lock_bh(&priv->lock);
- write_seqcount_begin(&priv->count);
- err = __nft_rbtree_insert(net, set, rbe, elem_priv);
- write_seqcount_end(&priv->count);
+ err = __nft_rbtree_insert(net, set, rbe, elem_priv, tstamp, last);
write_unlock_bh(&priv->lock);
+
+ if (nft_rbtree_interval_end(rbe))
+ priv->start_rbe_cookie = 0;
+
} while (err == -EAGAIN);

return err;
@@ -508,9 +699,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe)
{
write_lock_bh(&priv->lock);
- write_seqcount_begin(&priv->count);
rb_erase(&rbe->node, &priv->root);
- write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
}

@@ -533,6 +722,48 @@ static void nft_rbtree_activate(const struct net *net,
nft_clear(net, &rbe->ext);
}

+static struct nft_rbtree_elem *
+nft_rbtree_next_inactive(struct nft_rbtree_elem *rbe, u8 genmask)
+{
+ struct nft_rbtree_elem *next_rbe;
+ struct rb_node *node;
+
+ node = rb_next(&rbe->node);
+ if (node) {
+ next_rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ if (nft_rbtree_interval_start(next_rbe) &&
+ !nft_set_elem_active(&next_rbe->ext, genmask))
+ return next_rbe;
+ }
+
+ return NULL;
+}
+
+static bool nft_rbtree_deactivate_same_interval(const struct net *net,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ u8 genmask = nft_genmask_next(net);
+ struct nft_rbtree_elem *next_rbe;
+
+ if (!priv->start_rbe_cookie)
+ return true;
+
+ next_rbe = nft_rbtree_next_inactive(rbe, genmask);
+ if (next_rbe) {
+ /* Closest start element differs from last element added. */
+ if (nft_rbtree_interval_start(next_rbe) &&
+ nft_rbtree_cmp_start_cookie(priv, next_rbe)) {
+ priv->start_rbe_cookie = 0;
+ return true;
+ }
+ }
+
+ priv->start_rbe_cookie = 0;
+
+ return false;
+}
+
static void nft_rbtree_flush(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
@@ -547,12 +778,22 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv);
- const struct nft_rbtree *priv = nft_set_priv(set);
+ bool last = !!(elem->flags & NFT_SET_ELEM_INTERNAL_LAST);
+ struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
u8 genmask = nft_genmask_next(net);
u64 tstamp = nft_net_tstamp(net);
int d;

+ nft_rbtree_maybe_reset_start_cookie(priv, tstamp);
+
+ if (nft_rbtree_interval_start(this) ||
+ nft_rbtree_interval_null(set, this))
+ priv->start_rbe_cookie = 0;
+
+ if (nft_array_may_resize(set) < 0)
+ return NULL;
+
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);

@@ -577,6 +818,13 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
parent = parent->rb_left;
continue;
}
+
+ if (nft_rbtree_interval_start(rbe)) {
+ if (!last)
+ nft_rbtree_set_start_cookie(priv, rbe);
+ } else if (!nft_rbtree_deactivate_same_interval(net, priv, rbe))
+ return NULL;
+
nft_rbtree_flush(net, set, &rbe->priv);
return &rbe->priv;
}
@@ -615,6 +863,11 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
switch (iter->type) {
case NFT_ITER_UPDATE:
lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex);
+
+ if (nft_array_may_resize(set) < 0) {
+ iter->err = -ENOMEM;
+ break;
+ }
nft_rbtree_do_walk(ctx, set, iter);
break;
case NFT_ITER_READ:
@@ -629,29 +882,13 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
}
}

-static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
- struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe)
-{
- nft_setelem_data_deactivate(net, set, &rbe->priv);
- nft_rbtree_erase(priv, rbe);
-}
-
-static void nft_rbtree_gc(struct nft_set *set)
+static void nft_rbtree_gc_scan(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
struct net *net = read_pnet(&set->net);
u64 tstamp = nft_net_tstamp(net);
struct rb_node *node, *next;
- struct nft_trans_gc *gc;
-
- set = nft_set_container_of(priv);
- net = read_pnet(&set->net);
-
- gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
- if (!gc)
- return;

for (node = rb_first(&priv->root); node ; node = next) {
next = rb_next(node);
@@ -669,34 +906,46 @@ static void nft_rbtree_gc(struct nft_set *set)
if (!__nft_set_elem_expired(&rbe->ext, tstamp))
continue;

- gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
- if (!gc)
- goto try_later;
-
/* end element needs to be removed first, it has
* no timeout extension.
*/
+ write_lock_bh(&priv->lock);
if (rbe_end) {
- nft_rbtree_gc_remove(net, set, priv, rbe_end);
- nft_trans_gc_elem_add(gc, rbe_end);
+ nft_rbtree_gc_elem_move(net, set, priv, rbe_end);
rbe_end = NULL;
}

- gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
- if (!gc)
- goto try_later;
-
- nft_rbtree_gc_remove(net, set, priv, rbe);
- nft_trans_gc_elem_add(gc, rbe);
+ nft_rbtree_gc_elem_move(net, set, priv, rbe);
+ write_unlock_bh(&priv->lock);
}

-try_later:
+ priv->last_gc = jiffies;
+}
+
+static void nft_rbtree_gc_queue(struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe, *rbe_end;
+ struct nft_trans_gc *gc;

- if (gc) {
- gc = nft_trans_gc_catchall_sync(gc);
- nft_trans_gc_queue_sync_done(gc);
- priv->last_gc = jiffies;
+ if (list_empty(&priv->expired))
+ return;
+
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+ if (!gc)
+ return;
+
+ list_for_each_entry_safe(rbe, rbe_end, &priv->expired, list) {
+ list_del(&rbe->list);
+ nft_trans_gc_elem_add(gc, rbe);
+
+ gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
+ if (!gc)
+ return;
}
+
+ gc = nft_trans_gc_catchall_sync(gc);
+ nft_trans_gc_queue_sync_done(gc);
}

static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
@@ -714,24 +963,45 @@ static int nft_rbtree_init(const struct nft_set *set,
BUILD_BUG_ON(offsetof(struct nft_rbtree_elem, priv) != 0);

rwlock_init(&priv->lock);
- seqcount_rwlock_init(&priv->count, &priv->lock);
priv->root = RB_ROOT;
+ INIT_LIST_HEAD(&priv->expired);
+
+ priv->array = NULL;
+ priv->array_next = NULL;

return 0;
}

+static void __nft_array_free(struct nft_array *array)
+{
+ kvfree(array->intervals);
+ kfree(array);
+}
+
static void nft_rbtree_destroy(const struct nft_ctx *ctx,
const struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
- struct nft_rbtree_elem *rbe;
+ struct nft_rbtree_elem *rbe, *next;
+ struct nft_array *array;
struct rb_node *node;

+ list_for_each_entry_safe(rbe, next, &priv->expired, list) {
+ list_del(&rbe->list);
+ nf_tables_set_elem_destroy(ctx, set, &rbe->priv);
+ }
+
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nf_tables_set_elem_destroy(ctx, set, &rbe->priv);
}
+
+ array = rcu_dereference_protected(priv->array, true);
+ if (array)
+ __nft_array_free(array);
+ if (priv->array_next)
+ __nft_array_free(priv->array_next);
}

static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
@@ -752,12 +1022,105 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}

+static void nft_array_free_rcu(struct rcu_head *rcu_head)
+{
+ struct nft_array *array = container_of(rcu_head, struct nft_array, rcu_head);
+
+ __nft_array_free(array);
+}
+
static void nft_rbtree_commit(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe, *prev_rbe;
+ struct nft_array *old;
+ u32 num_intervals = 0;
+ struct rb_node *node;

+ /* No changes, skip, eg. elements updates only. */
+ if (!priv->array_next)
+ return;
+
+ /* GC can be performed if the binary search blob is going
+ * to be rebuilt. It has to be done in two phases: first
+ * scan tree and move all expired elements to the expired
+ * list.
+ *
+ * Then, after blob has been re-built and published to other
+ * CPUs, queue collected entries for freeing.
+ */
if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
- nft_rbtree_gc(set);
+ nft_rbtree_gc_scan(set);
+
+ /* Reverse walk to create an array from smaller to largest interval. */
+ node = rb_last(&priv->root);
+ if (node)
+ prev_rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ else
+ prev_rbe = NULL;
+
+ while (prev_rbe) {
+ rbe = prev_rbe;
+
+ if (nft_rbtree_interval_start(rbe))
+ priv->array_next->intervals[num_intervals].from = &rbe->ext;
+ else if (nft_rbtree_interval_end(rbe))
+ priv->array_next->intervals[num_intervals++].to = &rbe->ext;
+
+ if (num_intervals >= priv->array_next->max_intervals) {
+ pr_warn_once("malformed interval set from userspace?");
+ goto err_out;
+ }
+
+ node = rb_prev(node);
+ if (!node)
+ break;
+
+ prev_rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
+ /* For anonymous sets, when adjacent ranges are found,
+ * the end element is not added to the set to pack the set
+ * representation. Use next start element to complete this
+ * interval.
+ */
+ if (nft_rbtree_interval_start(rbe) &&
+ nft_rbtree_interval_start(prev_rbe) &&
+ priv->array_next->intervals[num_intervals].from)
+ priv->array_next->intervals[num_intervals++].to = &prev_rbe->ext;
+
+ if (num_intervals >= priv->array_next->max_intervals) {
+ pr_warn_once("malformed interval set from userspace?");
+ goto err_out;
+ }
+ }
+
+ if (priv->array_next->intervals[num_intervals].from)
+ num_intervals++;
+err_out:
+ priv->array_next->num_intervals = num_intervals;
+ old = rcu_replace_pointer(priv->array, priv->array_next,
+ lockdep_is_held(&nft_pernet(read_pnet(&set->net))->commit_mutex));
+ priv->array_next = NULL;
+ if (old)
+ call_rcu(&old->rcu_head, nft_array_free_rcu);
+
+ /* New blob is public, queue collected entries for freeing.
+ * call_rcu ensures elements stay around until readers are done.
+ */
+ nft_rbtree_gc_queue(set);
+}
+
+static void nft_rbtree_abort(const struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_array *array_next;
+
+ if (!priv->array_next)
+ return;
+
+ array_next = priv->array_next;
+ priv->array_next = NULL;
+ __nft_array_free(array_next);
}

static void nft_rbtree_gc_init(const struct nft_set *set)
@@ -821,6 +1184,7 @@ const struct nft_set_type nft_set_rbtree_type = {
.flush = nft_rbtree_flush,
.activate = nft_rbtree_activate,
.commit = nft_rbtree_commit,
+ .abort = nft_rbtree_abort,
.gc_init = nft_rbtree_gc_init,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 4fc37894860c..08c8aa1530d8 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -762,6 +762,14 @@ static void llc_shdlc_deinit(struct nfc_llc *llc)
{
struct llc_shdlc *shdlc = nfc_llc_get_data(llc);

+ timer_shutdown_sync(&shdlc->connect_timer);
+ timer_shutdown_sync(&shdlc->t1_timer);
+ timer_shutdown_sync(&shdlc->t2_timer);
+ shdlc->t1_active = false;
+ shdlc->t2_active = false;
+
+ cancel_work_sync(&shdlc->sm_work);
+
skb_queue_purge(&shdlc->rcv_q);
skb_queue_purge(&shdlc->send_q);
skb_queue_purge(&shdlc->ack_pending_q);
diff --git a/net/psp/Kconfig b/net/psp/Kconfig
index 371e8771f3bd..84d6b0f25460 100644
--- a/net/psp/Kconfig
+++ b/net/psp/Kconfig
@@ -6,6 +6,7 @@ config INET_PSP
bool "PSP Security Protocol support"
depends on INET
select SKB_DECRYPTED
+ select SKB_EXTENSIONS
select SOCK_VALIDATE_XMIT
help
Enable kernel support for the PSP Security Protocol (PSP).
diff --git a/net/rds/send.c b/net/rds/send.c
index 0b3d0ef2f008..071c5dca969a 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1382,9 +1382,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
else
queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
rcu_read_unlock();
+
+ if (ret)
+ goto out;
}
- if (ret)
- goto out;
+
rds_message_put(rm);

for (ind = 0; ind < vct.indx; ind++)
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 5c095cb8cb20..bb3c3db2713b 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -39,6 +39,8 @@ static const struct rpc_authops authgss_ops;
static const struct rpc_credops gss_credops;
static const struct rpc_credops gss_nullops;

+static void gss_free_callback(struct kref *kref);
+
#define GSS_RETRY_EXPIRED 5
static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;

@@ -551,6 +553,7 @@ gss_alloc_msg(struct gss_auth *gss_auth,
}
return gss_msg;
err_put_pipe_version:
+ kref_put(&gss_auth->kref, gss_free_callback);
put_pipe_version(gss_auth->net);
err_free_msg:
kfree(gss_msg);
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 7d2cdc2bd374..f320c0a8e604 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -320,29 +320,47 @@ static int gssx_dec_status(struct xdr_stream *xdr,

/* status->minor_status */
p = xdr_inline_decode(xdr, 8);
- if (unlikely(p == NULL))
- return -ENOSPC;
+ if (unlikely(p == NULL)) {
+ err = -ENOSPC;
+ goto out_free_mech;
+ }
p = xdr_decode_hyper(p, &status->minor_status);

/* status->major_status_string */
err = gssx_dec_buffer(xdr, &status->major_status_string);
if (err)
- return err;
+ goto out_free_mech;

/* status->minor_status_string */
err = gssx_dec_buffer(xdr, &status->minor_status_string);
if (err)
- return err;
+ goto out_free_major_status_string;

/* status->server_ctx */
err = gssx_dec_buffer(xdr, &status->server_ctx);
if (err)
- return err;
+ goto out_free_minor_status_string;

/* we assume we have no options for now, so simply consume them */
/* status->options */
err = dummy_dec_opt_array(xdr, &status->options);
+ if (err)
+ goto out_free_server_ctx;

+ return 0;
+
+out_free_server_ctx:
+ kfree(status->server_ctx.data);
+ status->server_ctx.data = NULL;
+out_free_minor_status_string:
+ kfree(status->minor_status_string.data);
+ status->minor_status_string.data = NULL;
+out_free_major_status_string:
+ kfree(status->major_status_string.data);
+ status->major_status_string.data = NULL;
+out_free_mech:
+ kfree(status->mech.data);
+ status->mech.data = NULL;
return err;
}

@@ -505,28 +523,35 @@ static int gssx_dec_name(struct xdr_stream *xdr,
/* name->name_type */
err = gssx_dec_buffer(xdr, &dummy_netobj);
if (err)
- return err;
+ goto out_free_display_name;

/* name->exported_name */
err = gssx_dec_buffer(xdr, &dummy_netobj);
if (err)
- return err;
+ goto out_free_display_name;

/* name->exported_composite_name */
err = gssx_dec_buffer(xdr, &dummy_netobj);
if (err)
- return err;
+ goto out_free_display_name;

/* we assume we have no attributes for now, so simply consume them */
/* name->name_attributes */
err = dummy_dec_nameattr_array(xdr, &dummy_name_attr_array);
if (err)
- return err;
+ goto out_free_display_name;

/* we assume we have no options for now, so simply consume them */
/* name->extensions */
err = dummy_dec_opt_array(xdr, &dummy_option_array);
+ if (err)
+ goto out_free_display_name;

+ return 0;
+
+out_free_display_name:
+ kfree(name->display_name.data);
+ name->display_name.data = NULL;
return err;
}

@@ -649,32 +674,34 @@ static int gssx_dec_ctx(struct xdr_stream *xdr,
/* ctx->state */
err = gssx_dec_buffer(xdr, &ctx->state);
if (err)
- return err;
+ goto out_free_exported_context_token;

/* ctx->need_release */
err = gssx_dec_bool(xdr, &ctx->need_release);
if (err)
- return err;
+ goto out_free_state;

/* ctx->mech */
err = gssx_dec_buffer(xdr, &ctx->mech);
if (err)
- return err;
+ goto out_free_state;

/* ctx->src_name */
err = gssx_dec_name(xdr, &ctx->src_name);
if (err)
- return err;
+ goto out_free_mech;

/* ctx->targ_name */
err = gssx_dec_name(xdr, &ctx->targ_name);
if (err)
- return err;
+ goto out_free_src_name;

/* ctx->lifetime */
p = xdr_inline_decode(xdr, 8+8);
- if (unlikely(p == NULL))
- return -ENOSPC;
+ if (unlikely(p == NULL)) {
+ err = -ENOSPC;
+ goto out_free_targ_name;
+ }
p = xdr_decode_hyper(p, &ctx->lifetime);

/* ctx->ctx_flags */
@@ -683,17 +710,36 @@ static int gssx_dec_ctx(struct xdr_stream *xdr,
/* ctx->locally_initiated */
err = gssx_dec_bool(xdr, &ctx->locally_initiated);
if (err)
- return err;
+ goto out_free_targ_name;

/* ctx->open */
err = gssx_dec_bool(xdr, &ctx->open);
if (err)
- return err;
+ goto out_free_targ_name;

/* we assume we have no options for now, so simply consume them */
/* ctx->options */
err = dummy_dec_opt_array(xdr, &ctx->options);
+ if (err)
+ goto out_free_targ_name;
+
+ return 0;

+out_free_targ_name:
+ kfree(ctx->targ_name.display_name.data);
+ ctx->targ_name.display_name.data = NULL;
+out_free_src_name:
+ kfree(ctx->src_name.display_name.data);
+ ctx->src_name.display_name.data = NULL;
+out_free_mech:
+ kfree(ctx->mech.data);
+ ctx->mech.data = NULL;
+out_free_state:
+ kfree(ctx->state.data);
+ ctx->state.data = NULL;
+out_free_exported_context_token:
+ kfree(ctx->exported_context_token.data);
+ ctx->exported_context_token.data = NULL;
return err;
}

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 3d7f1413df02..12857381e861 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -462,7 +462,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_bc_requests = 2;
}

- /* Arbitrary estimate of the needed number of rdma_rw contexts.
+ /* Estimate the needed number of rdma_rw contexts. The maximum
+ * Read and Write chunks have one segment each. Each request
+ * can involve one Read chunk and either a Write chunk or Reply
+ * chunk; thus a factor of three.
*/
maxpayload = min(xprt->xpt_server->sv_max_payload,
RPCSVC_MAXPAYLOAD_RDMA);
@@ -470,7 +473,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
rdma_rw_mr_factor(dev, newxprt->sc_port_num,
maxpayload >> PAGE_SHIFT);

- newxprt->sc_sq_depth = rq_depth + ctxts;
+ newxprt->sc_sq_depth = rq_depth +
+ rdma_rw_max_send_wr(dev, newxprt->sc_port_num, ctxts, 0);
if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c634a7fc8609..9dad3af700af 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1671,10 +1671,9 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,

timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);

- /* First of all allocate resources.
- * If we will make it after state is locked,
- * we will have to recheck all again in any case.
- */
+ err = prepare_peercred(&peercred);
+ if (err)
+ goto out;

/* create new sock for complete connection */
newsk = unix_create1(net, NULL, 0, sock->type);
@@ -1683,10 +1682,6 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
goto out;
}

- err = prepare_peercred(&peercred);
- if (err)
- goto out;
-
/* Allocate skb for sending to listening sock */
skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
if (!skb) {
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5e5c1bc380a8..87f083d9247a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1400,8 +1400,10 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev,
cfg80211_leave_ocb(rdev, dev);
break;
case NL80211_IFTYPE_P2P_DEVICE:
+ cfg80211_stop_p2p_device(rdev, wdev);
+ break;
case NL80211_IFTYPE_NAN:
- /* cannot happen, has no netdev */
+ cfg80211_stop_nan(rdev, wdev);
break;
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_MONITOR:
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 90a9187a6b13..9a0c02c23dc5 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1959,7 +1959,7 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
ether_addr_copy(known->parent_bssid, new->parent_bssid);
known->pub.max_bssid_indicator = new->pub.max_bssid_indicator;
known->pub.bssid_index = new->pub.bssid_index;
- known->pub.use_for &= new->pub.use_for;
+ known->pub.use_for = new->pub.use_for;
known->pub.cannot_use_reasons = new->pub.cannot_use_reasons;
known->bss_source = new->bss_source;

diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
index 49fad6de0674..cc907fb531bc 100644
--- a/rust/kernel/task.rs
+++ b/rust/kernel/task.rs
@@ -204,18 +204,6 @@ pub fn as_ptr(&self) -> *mut bindings::task_struct {
self.0.get()
}

- /// Returns the group leader of the given task.
- pub fn group_leader(&self) -> &Task {
- // SAFETY: The group leader of a task never changes after initialization, so reading this
- // field is not a data race.
- let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) };
-
- // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`,
- // and given that a task has a reference to its group leader, we know it must be valid for
- // the lifetime of the returned task reference.
- unsafe { &*ptr.cast() }
- }
-
/// Returns the PID of the given task.
pub fn pid(&self) -> Pid {
// SAFETY: The pid of a task never changes after initialization, so reading this field is
@@ -345,6 +333,18 @@ pub fn active_pid_ns(&self) -> Option<&PidNamespace> {
// `release_task()` call.
Some(unsafe { PidNamespace::from_ptr(active_ns) })
}
+
+ /// Returns the group leader of the current task.
+ pub fn group_leader(&self) -> &Task {
+ // SAFETY: The group leader of a task never changes while the task is running, and `self`
+ // is the current task, which is guaranteed running.
+ let ptr = unsafe { (*self.as_ptr()).group_leader };
+
+ // SAFETY: `current->group_leader` stays valid for at least the duration in which `current`
+ // is running, and the signature of this function ensures that the returned `&Task` can
+ // only be used while `current` is still valid, thus still running.
+ unsafe { &*ptr.cast() }
+ }
}

// SAFETY: The type invariants guarantee that `Task` is always refcounted.
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 391a586d0557..7803b973b4c4 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1639,6 +1639,15 @@ static const char *rawdata_get_link_base(struct dentry *dentry,

label = aa_get_label_rcu(&proxy->label);
profile = labels_profile(label);
+
+ /* rawdata can be null when aa_g_export_binary is unset during
+ * runtime and a profile is replaced
+ */
+ if (!profile->rawdata) {
+ aa_put_label(label);
+ return ERR_PTR(-ENOENT);
+ }
+
depth = profile_depth(profile);
target = gen_symlink_name(depth, profile->rawdata->name, name);
aa_put_label(label);
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index c75820402878..7de23e85cd5d 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -154,8 +154,12 @@ static int path_name(const char *op, const struct cred *subj_cred,
const char *info = NULL;
int error;

- error = aa_path_name(path, flags, buffer, name, &info,
- labels_profile(label)->disconnected);
+ /* don't reaudit files closed during inheritance */
+ if (unlikely(path->dentry == aa_null.dentry))
+ error = -EACCES;
+ else
+ error = aa_path_name(path, flags, buffer, name, &info,
+ labels_profile(label)->disconnected);
if (error) {
fn_for_each_confined(label, profile,
aa_audit_file(subj_cred,
@@ -578,6 +582,9 @@ static bool __unix_needs_revalidation(struct file *file, struct aa_label *label,
return false;
if (request & NET_PEER_MASK)
return false;
+ /* sock and sock->sk can be NULL for sockets being set up or torn down */
+ if (!sock || !sock->sk)
+ return false;
if (sock->sk->sk_family == PF_UNIX) {
struct aa_sk_ctx *ctx = aa_sock(sock->sk);

@@ -613,6 +620,10 @@ int aa_file_perm(const char *op, const struct cred *subj_cred,
AA_BUG(!label);
AA_BUG(!file);

+ /* don't reaudit files closed during inheritance */
+ if (unlikely(file->f_path.dentry == aa_null.dentry))
+ return -EACCES;
+
fctx = file_ctx(file);

rcu_read_lock();
diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h
index 1fbe82f5021b..0dde8eda3d1a 100644
--- a/security/apparmor/include/match.h
+++ b/security/apparmor/include/match.h
@@ -104,16 +104,18 @@ struct aa_dfa {
struct table_header *tables[YYTD_ID_TSIZE];
};

-#define byte_to_byte(X) (X)
-
#define UNPACK_ARRAY(TABLE, BLOB, LEN, TTYPE, BTYPE, NTOHX) \
do { \
typeof(LEN) __i; \
TTYPE *__t = (TTYPE *) TABLE; \
BTYPE *__b = (BTYPE *) BLOB; \
- for (__i = 0; __i < LEN; __i++) { \
- __t[__i] = NTOHX(__b[__i]); \
- } \
+ BUILD_BUG_ON(sizeof(TTYPE) != sizeof(BTYPE)); \
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) \
+ memcpy(__t, __b, (LEN) * sizeof(BTYPE)); \
+ else /* copy & convert from big-endian */ \
+ for (__i = 0; __i < LEN; __i++) { \
+ __t[__i] = NTOHX(&__b[__i]); \
+ } \
} while (0)

static inline size_t table_size(size_t len, size_t el_size)
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 913678f199c3..dd6c58f595ba 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -1278,7 +1278,7 @@ static inline aa_state_t match_component(struct aa_profile *profile,
* @request: permissions to request
* @perms: perms struct to set
*
- * Returns: 0 on success else ERROR
+ * Returns: state match stopped at or DFA_NOMATCH if aborted early
*
* For the label A//&B//&C this does the perm match for A//&B//&C
* @perms should be preinitialized with allperms OR a previous permission
@@ -1305,7 +1305,7 @@ static int label_compound_match(struct aa_profile *profile,

/* no component visible */
*perms = allperms;
- return 0;
+ return state;

next:
label_for_each_cont(i, label, tp) {
@@ -1317,15 +1317,11 @@ static int label_compound_match(struct aa_profile *profile,
goto fail;
}
*perms = *aa_lookup_perms(rules->policy, state);
- aa_apply_modes_to_perms(profile, perms);
- if ((perms->allow & request) != request)
- return -EACCES;
-
- return 0;
+ return state;

fail:
*perms = nullperms;
- return state;
+ return DFA_NOMATCH;
}

/**
@@ -1338,7 +1334,7 @@ static int label_compound_match(struct aa_profile *profile,
* @request: permissions to request
* @perms: an initialized perms struct to add accumulation to
*
- * Returns: 0 on success else ERROR
+ * Returns: the state the match finished in, may be the none matching state
*
* For the label A//&B//&C this does the perm match for each of A and B and C
* @perms should be preinitialized with allperms OR a previous permission
@@ -1366,11 +1362,10 @@ static int label_components_match(struct aa_profile *profile,
}

/* no subcomponents visible - no change in perms */
- return 0;
+ return state;

next:
tmp = *aa_lookup_perms(rules->policy, state);
- aa_apply_modes_to_perms(profile, &tmp);
aa_perms_accum(perms, &tmp);
label_for_each_cont(i, label, tp) {
if (!aa_ns_visible(profile->ns, tp->ns, subns))
@@ -1379,18 +1374,17 @@ static int label_components_match(struct aa_profile *profile,
if (!state)
goto fail;
tmp = *aa_lookup_perms(rules->policy, state);
- aa_apply_modes_to_perms(profile, &tmp);
aa_perms_accum(perms, &tmp);
}

if ((perms->allow & request) != request)
- return -EACCES;
+ return DFA_NOMATCH;

- return 0;
+ return state;

fail:
*perms = nullperms;
- return -EACCES;
+ return DFA_NOMATCH;
}

/**
@@ -1409,11 +1403,12 @@ int aa_label_match(struct aa_profile *profile, struct aa_ruleset *rules,
struct aa_label *label, aa_state_t state, bool subns,
u32 request, struct aa_perms *perms)
{
- int error = label_compound_match(profile, rules, label, state, subns,
- request, perms);
- if (!error)
- return error;
+ aa_state_t tmp = label_compound_match(profile, rules, label, state, subns,
+ request, perms);
+ if ((perms->allow & request) == request)
+ return tmp;

+ /* failed compound_match try component matches */
*perms = allperms;
return label_components_match(profile, rules, label, state, subns,
request, perms);
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index b3f7a3258a2c..13c9bfdf65ff 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -519,33 +519,26 @@ static void apparmor_file_free_security(struct file *file)
aa_put_label(rcu_access_pointer(ctx->label));
}

-static int common_file_perm(const char *op, struct file *file, u32 mask,
- bool in_atomic)
+static int common_file_perm(const char *op, struct file *file, u32 mask)
{
struct aa_label *label;
int error = 0;
- bool needput;
-
- /* don't reaudit files closed during inheritance */
- if (unlikely(file->f_path.dentry == aa_null.dentry))
- return -EACCES;

- label = __begin_current_label_crit_section(&needput);
- error = aa_file_perm(op, current_cred(), label, file, mask, in_atomic);
- __end_current_label_crit_section(label, needput);
+ label = begin_current_label_crit_section();
+ error = aa_file_perm(op, current_cred(), label, file, mask, false);
+ end_current_label_crit_section(label);

return error;
}

static int apparmor_file_receive(struct file *file)
{
- return common_file_perm(OP_FRECEIVE, file, aa_map_file_to_perms(file),
- false);
+ return common_file_perm(OP_FRECEIVE, file, aa_map_file_to_perms(file));
}

static int apparmor_file_permission(struct file *file, int mask)
{
- return common_file_perm(OP_FPERM, file, mask, false);
+ return common_file_perm(OP_FPERM, file, mask);
}

static int apparmor_file_lock(struct file *file, unsigned int cmd)
@@ -555,11 +548,11 @@ static int apparmor_file_lock(struct file *file, unsigned int cmd)
if (cmd == F_WRLCK)
mask |= MAY_WRITE;

- return common_file_perm(OP_FLOCK, file, mask, false);
+ return common_file_perm(OP_FLOCK, file, mask);
}

static int common_mmap(const char *op, struct file *file, unsigned long prot,
- unsigned long flags, bool in_atomic)
+ unsigned long flags)
{
int mask = 0;

@@ -577,21 +570,20 @@ static int common_mmap(const char *op, struct file *file, unsigned long prot,
if (prot & PROT_EXEC)
mask |= AA_EXEC_MMAP;

- return common_file_perm(op, file, mask, in_atomic);
+ return common_file_perm(op, file, mask);
}

static int apparmor_mmap_file(struct file *file, unsigned long reqprot,
unsigned long prot, unsigned long flags)
{
- return common_mmap(OP_FMMAP, file, prot, flags, GFP_ATOMIC);
+ return common_mmap(OP_FMMAP, file, prot, flags);
}

static int apparmor_file_mprotect(struct vm_area_struct *vma,
unsigned long reqprot, unsigned long prot)
{
return common_mmap(OP_FMPROT, vma->vm_file, prot,
- !(vma->vm_flags & VM_SHARED) ? MAP_PRIVATE : 0,
- false);
+ !(vma->vm_flags & VM_SHARED) ? MAP_PRIVATE : 0);
}

#ifdef CONFIG_IO_URING
@@ -2144,7 +2136,8 @@ char *aa_get_buffer(bool in_atomic)
if (!list_empty(&cache->head)) {
aa_buf = list_first_entry(&cache->head, union aa_buffer, list);
list_del(&aa_buf->list);
- cache->hold--;
+ if (cache->hold)
+ cache->hold--;
cache->count--;
put_cpu_ptr(&aa_local_buffers);
return &aa_buf->buffer[0];
diff --git a/security/apparmor/match.c b/security/apparmor/match.c
index c5a91600842a..bbeb3be68572 100644
--- a/security/apparmor/match.c
+++ b/security/apparmor/match.c
@@ -15,6 +15,7 @@
#include <linux/vmalloc.h>
#include <linux/err.h>
#include <linux/kref.h>
+#include <linux/unaligned.h>

#include "include/lib.h"
#include "include/match.h"
@@ -42,11 +43,11 @@ static struct table_header *unpack_table(char *blob, size_t bsize)
/* loaded td_id's start at 1, subtract 1 now to avoid doing
* it every time we use td_id as an index
*/
- th.td_id = be16_to_cpu(*(__be16 *) (blob)) - 1;
+ th.td_id = get_unaligned_be16(blob) - 1;
if (th.td_id > YYTD_ID_MAX)
goto out;
- th.td_flags = be16_to_cpu(*(__be16 *) (blob + 2));
- th.td_lolen = be32_to_cpu(*(__be32 *) (blob + 8));
+ th.td_flags = get_unaligned_be16(blob + 2);
+ th.td_lolen = get_unaligned_be32(blob + 8);
blob += sizeof(struct table_header);

if (!(th.td_flags == YYTD_DATA16 || th.td_flags == YYTD_DATA32 ||
@@ -66,14 +67,13 @@ static struct table_header *unpack_table(char *blob, size_t bsize)
table->td_flags = th.td_flags;
table->td_lolen = th.td_lolen;
if (th.td_flags == YYTD_DATA8)
- UNPACK_ARRAY(table->td_data, blob, th.td_lolen,
- u8, u8, byte_to_byte);
+ memcpy(table->td_data, blob, th.td_lolen);
else if (th.td_flags == YYTD_DATA16)
UNPACK_ARRAY(table->td_data, blob, th.td_lolen,
- u16, __be16, be16_to_cpu);
+ u16, __be16, get_unaligned_be16);
else if (th.td_flags == YYTD_DATA32)
UNPACK_ARRAY(table->td_data, blob, th.td_lolen,
- u32, __be32, be32_to_cpu);
+ u32, __be32, get_unaligned_be32);
else
goto fail;
/* if table was vmalloced make sure the page tables are synced
@@ -313,14 +313,14 @@ struct aa_dfa *aa_dfa_unpack(void *blob, size_t size, int flags)
if (size < sizeof(struct table_set_header))
goto fail;

- if (ntohl(*(__be32 *) data) != YYTH_MAGIC)
+ if (get_unaligned_be32(data) != YYTH_MAGIC)
goto fail;

- hsize = ntohl(*(__be32 *) (data + 4));
+ hsize = get_unaligned_be32(data + 4);
if (size < hsize)
goto fail;

- dfa->flags = ntohs(*(__be16 *) (data + 12));
+ dfa->flags = get_unaligned_be16(data + 12);
if (dfa->flags & ~(YYTH_FLAGS))
goto fail;

@@ -329,7 +329,7 @@ struct aa_dfa *aa_dfa_unpack(void *blob, size_t size, int flags)
* if (dfa->flags & YYTH_FLAGS_OOB_TRANS) {
* if (hsize < 16 + 4)
* goto fail;
- * dfa->max_oob = ntol(*(__be32 *) (data + 16));
+ * dfa->max_oob = get_unaligned_be32(data + 16);
* if (dfa->max <= MAX_OOB_SUPPORTED) {
* pr_err("AppArmor DFA OOB greater than supported\n");
* goto fail;
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
index 45cf25605c34..44c04102062f 100644
--- a/security/apparmor/net.c
+++ b/security/apparmor/net.c
@@ -326,8 +326,10 @@ int aa_sock_file_perm(const struct cred *subj_cred, struct aa_label *label,
struct socket *sock = (struct socket *) file->private_data;

AA_BUG(!label);
- AA_BUG(!sock);
- AA_BUG(!sock->sk);
+
+ /* sock && sock->sk can be NULL for sockets being set up or torn down */
+ if (!sock || !sock->sk)
+ return 0;

if (sock->sk->sk_family == PF_UNIX)
return aa_unix_file_perm(subj_cred, label, op, request, file);
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 7523971e37d9..dd602bd5fca9 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -687,8 +687,10 @@ static ssize_t unpack_perms_table(struct aa_ext *e, struct aa_perms **perms)
if (!aa_unpack_array(e, NULL, &size))
goto fail_reset;
*perms = kcalloc(size, sizeof(struct aa_perms), GFP_KERNEL);
- if (!*perms)
- goto fail_reset;
+ if (!*perms) {
+ e->pos = pos;
+ return -ENOMEM;
+ }
for (i = 0; i < size; i++) {
if (!unpack_perm(e, version, &(*perms)[i]))
goto fail;
diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c
index 8e80db3ae21c..64212b39ba4b 100644
--- a/security/apparmor/resource.c
+++ b/security/apparmor/resource.c
@@ -196,6 +196,11 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l)
rules->rlimits.limits[j].rlim_max);
/* soft limit should not exceed hard limit */
rlim->rlim_cur = min(rlim->rlim_cur, rlim->rlim_max);
+ if (j == RLIMIT_CPU &&
+ rlim->rlim_cur != RLIM_INFINITY &&
+ IS_ENABLED(CONFIG_POSIX_TIMERS))
+ (void) update_rlimit_cpu(current->group_leader,
+ rlim->rlim_cur);
}
}
}
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index a5e730ffda57..5a8cef45bacf 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -401,6 +401,7 @@ int evm_init_hmac(struct inode *inode, const struct xattr *xattrs,
{
struct shash_desc *desc;
const struct xattr *xattr;
+ struct xattr_list *xattr_entry;

desc = init_desc(EVM_XATTR_HMAC, HASH_ALGO_SHA1);
if (IS_ERR(desc)) {
@@ -408,11 +409,16 @@ int evm_init_hmac(struct inode *inode, const struct xattr *xattrs,
return PTR_ERR(desc);
}

- for (xattr = xattrs; xattr->name; xattr++) {
- if (!evm_protected_xattr(xattr->name))
- continue;
+ list_for_each_entry_lockless(xattr_entry, &evm_config_xattrnames,
+ list) {
+ for (xattr = xattrs; xattr->name; xattr++) {
+ if (strcmp(xattr_entry->name +
+ XATTR_SECURITY_PREFIX_LEN, xattr->name) != 0)
+ continue;

- crypto_shash_update(desc, xattr->value, xattr->value_len);
+ crypto_shash_update(desc, xattr->value,
+ xattr->value_len);
+ }
}

hmac_add_misc(desc, inode, EVM_XATTR_HMAC, hmac_val);
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index b1e5e62f5cbd..d27d9140dda2 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -70,6 +70,7 @@ enum smk_inos {
static DEFINE_MUTEX(smack_cipso_lock);
static DEFINE_MUTEX(smack_ambient_lock);
static DEFINE_MUTEX(smk_net4addr_lock);
+static DEFINE_MUTEX(smk_cipso_doi_lock);
#if IS_ENABLED(CONFIG_IPV6)
static DEFINE_MUTEX(smk_net6addr_lock);
#endif /* CONFIG_IPV6 */
@@ -141,7 +142,7 @@ struct smack_parsed_rule {
int smk_access2;
};

-static int smk_cipso_doi_value = SMACK_CIPSO_DOI_DEFAULT;
+static u32 smk_cipso_doi_value = CIPSO_V4_DOI_UNKNOWN;

/*
* Values for parsing cipso rules
@@ -663,43 +664,60 @@ static const struct file_operations smk_load_ops = {
};

/**
- * smk_cipso_doi - initialize the CIPSO domain
+ * smk_cipso_doi - set netlabel maps
+ * @ndoi: new value for our CIPSO DOI
+ * @gfp_flags: kmalloc allocation context
*/
-static void smk_cipso_doi(void)
+static int
+smk_cipso_doi(u32 ndoi, gfp_t gfp_flags)
{
- int rc;
+ int rc = 0;
struct cipso_v4_doi *doip;
struct netlbl_audit nai;

- smk_netlabel_audit_set(&nai);
+ mutex_lock(&smk_cipso_doi_lock);

- rc = netlbl_cfg_map_del(NULL, PF_INET, NULL, NULL, &nai);
- if (rc != 0)
- printk(KERN_WARNING "%s:%d remove rc = %d\n",
- __func__, __LINE__, rc);
+ if (smk_cipso_doi_value == ndoi)
+ goto clr_doi_lock;
+
+ smk_netlabel_audit_set(&nai);

- doip = kmalloc(sizeof(struct cipso_v4_doi), GFP_KERNEL | __GFP_NOFAIL);
+ doip = kmalloc(sizeof(struct cipso_v4_doi), gfp_flags);
+ if (!doip) {
+ rc = -ENOMEM;
+ goto clr_doi_lock;
+ }
doip->map.std = NULL;
- doip->doi = smk_cipso_doi_value;
+ doip->doi = ndoi;
doip->type = CIPSO_V4_MAP_PASS;
doip->tags[0] = CIPSO_V4_TAG_RBITMAP;
for (rc = 1; rc < CIPSO_V4_TAG_MAXCNT; rc++)
doip->tags[rc] = CIPSO_V4_TAG_INVALID;

rc = netlbl_cfg_cipsov4_add(doip, &nai);
- if (rc != 0) {
- printk(KERN_WARNING "%s:%d cipso add rc = %d\n",
- __func__, __LINE__, rc);
+ if (rc) {
kfree(doip);
- return;
+ goto clr_doi_lock;
}
- rc = netlbl_cfg_cipsov4_map_add(doip->doi, NULL, NULL, NULL, &nai);
- if (rc != 0) {
- printk(KERN_WARNING "%s:%d map add rc = %d\n",
- __func__, __LINE__, rc);
- netlbl_cfg_cipsov4_del(doip->doi, &nai);
- return;
+
+ if (smk_cipso_doi_value != CIPSO_V4_DOI_UNKNOWN) {
+ rc = netlbl_cfg_map_del(NULL, PF_INET, NULL, NULL, &nai);
+ if (rc && rc != -ENOENT)
+ goto clr_ndoi_def;
+
+ netlbl_cfg_cipsov4_del(smk_cipso_doi_value, &nai);
}
+
+ rc = netlbl_cfg_cipsov4_map_add(ndoi, NULL, NULL, NULL, &nai);
+ if (rc) {
+ smk_cipso_doi_value = CIPSO_V4_DOI_UNKNOWN; // no default map
+clr_ndoi_def: netlbl_cfg_cipsov4_del(ndoi, &nai);
+ } else
+ smk_cipso_doi_value = ndoi;
+
+clr_doi_lock:
+ mutex_unlock(&smk_cipso_doi_lock);
+ return rc;
}

/**
@@ -1562,7 +1580,7 @@ static ssize_t smk_read_doi(struct file *filp, char __user *buf,
if (*ppos != 0)
return 0;

- sprintf(temp, "%d", smk_cipso_doi_value);
+ sprintf(temp, "%lu", (unsigned long)smk_cipso_doi_value);
rc = simple_read_from_buffer(buf, count, ppos, temp, strlen(temp));

return rc;
@@ -1581,7 +1599,7 @@ static ssize_t smk_write_doi(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
char temp[80];
- int i;
+ unsigned long u;

if (!smack_privileged(CAP_MAC_ADMIN))
return -EPERM;
@@ -1594,14 +1612,13 @@ static ssize_t smk_write_doi(struct file *file, const char __user *buf,

temp[count] = '\0';

- if (sscanf(temp, "%d", &i) != 1)
+ if (kstrtoul(temp, 10, &u))
return -EINVAL;

- smk_cipso_doi_value = i;
-
- smk_cipso_doi();
+ if (u == CIPSO_V4_DOI_UNKNOWN || u > U32_MAX)
+ return -EINVAL;

- return count;
+ return smk_cipso_doi(u, GFP_KERNEL) ? : count;
}

static const struct file_operations smk_doi_ops = {
@@ -2982,6 +2999,7 @@ static int __init init_smk_fs(void)
{
int err;
int rc;
+ struct netlbl_audit nai;

if (smack_enabled == 0)
return 0;
@@ -3000,7 +3018,10 @@ static int __init init_smk_fs(void)
}
}

- smk_cipso_doi();
+ smk_netlabel_audit_set(&nai);
+ (void) netlbl_cfg_map_del(NULL, PF_INET, NULL, NULL, &nai);
+ (void) smk_cipso_doi(SMACK_CIPSO_DOI_DEFAULT,
+ GFP_KERNEL | __GFP_NOFAIL);
smk_unlbl_ambient(NULL);

rc = smack_populate_secattr(&smack_known_floor);
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index 283aac441fa0..0b512085eb63 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -328,13 +328,13 @@ static const char *snd_pcm_oss_format_name(int format)
static void snd_pcm_proc_info_read(struct snd_pcm_substream *substream,
struct snd_info_buffer *buffer)
{
- struct snd_pcm_info *info __free(kfree) = NULL;
int err;

if (! substream)
return;

- info = kmalloc(sizeof(*info), GFP_KERNEL);
+ struct snd_pcm_info *info __free(kfree) =
+ kmalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return;

diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index 54eb9bd8eb21..e86f68f1f23c 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -235,7 +235,6 @@ static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream,
int refine,
struct snd_pcm_hw_params32 __user *data32)
{
- struct snd_pcm_hw_params *data __free(kfree) = NULL;
struct snd_pcm_runtime *runtime;
int err;

@@ -243,7 +242,8 @@ static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream,
if (!runtime)
return -ENOTTY;

- data = kmalloc(sizeof(*data), GFP_KERNEL);
+ struct snd_pcm_hw_params *data __free(kfree) =
+ kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;

@@ -332,7 +332,6 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream,
compat_caddr_t buf;
compat_caddr_t __user *bufptr;
u32 frames;
- void __user **bufs __free(kfree) = NULL;
int err, ch, i;

if (! substream->runtime)
@@ -349,7 +348,9 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream,
get_user(frames, &data32->frames))
return -EFAULT;
bufptr = compat_ptr(buf);
- bufs = kmalloc_array(ch, sizeof(void __user *), GFP_KERNEL);
+
+ void __user **bufs __free(kfree) =
+ kmalloc_array(ch, sizeof(void __user *), GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
for (i = 0; i < ch; i++) {
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 932a9bf98cbc..844ee1b4d286 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -242,10 +242,10 @@ int snd_pcm_info(struct snd_pcm_substream *substream, struct snd_pcm_info *info)
int snd_pcm_info_user(struct snd_pcm_substream *substream,
struct snd_pcm_info __user * _info)
{
- struct snd_pcm_info *info __free(kfree) = NULL;
int err;
+ struct snd_pcm_info *info __free(kfree) =
+ kmalloc(sizeof(*info), GFP_KERNEL);

- info = kmalloc(sizeof(*info), GFP_KERNEL);
if (! info)
return -ENOMEM;
err = snd_pcm_info(substream, info);
@@ -364,7 +364,6 @@ static int constrain_params_by_rules(struct snd_pcm_substream *substream,
struct snd_pcm_hw_constraints *constrs =
&substream->runtime->hw_constraints;
unsigned int k;
- unsigned int *rstamps __free(kfree) = NULL;
unsigned int vstamps[SNDRV_PCM_HW_PARAM_LAST_INTERVAL + 1];
unsigned int stamp;
struct snd_pcm_hw_rule *r;
@@ -380,7 +379,8 @@ static int constrain_params_by_rules(struct snd_pcm_substream *substream,
* Each member of 'rstamps' array represents the sequence number of
* recent application of corresponding rule.
*/
- rstamps = kcalloc(constrs->rules_num, sizeof(unsigned int), GFP_KERNEL);
+ unsigned int *rstamps __free(kfree) =
+ kcalloc(constrs->rules_num, sizeof(unsigned int), GFP_KERNEL);
if (!rstamps)
return -ENOMEM;

@@ -583,10 +583,10 @@ EXPORT_SYMBOL(snd_pcm_hw_refine);
static int snd_pcm_hw_refine_user(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params __user * _params)
{
- struct snd_pcm_hw_params *params __free(kfree) = NULL;
int err;
+ struct snd_pcm_hw_params *params __free(kfree) =
+ memdup_user(_params, sizeof(*params));

- params = memdup_user(_params, sizeof(*params));
if (IS_ERR(params))
return PTR_ERR(params);

@@ -889,10 +889,10 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
static int snd_pcm_hw_params_user(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params __user * _params)
{
- struct snd_pcm_hw_params *params __free(kfree) = NULL;
int err;
+ struct snd_pcm_hw_params *params __free(kfree) =
+ memdup_user(_params, sizeof(*params));

- params = memdup_user(_params, sizeof(*params));
if (IS_ERR(params))
return PTR_ERR(params);

@@ -2267,7 +2267,6 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
{
struct snd_pcm_file *pcm_file;
struct snd_pcm_substream *substream1;
- struct snd_pcm_group *group __free(kfree) = NULL;
struct snd_pcm_group *target_group;
bool nonatomic = substream->pcm->nonatomic;
CLASS(fd, f)(fd);
@@ -2283,7 +2282,8 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
if (substream == substream1)
return -EINVAL;

- group = kzalloc(sizeof(*group), GFP_KERNEL);
+ struct snd_pcm_group *group __free(kfree) =
+ kzalloc(sizeof(*group), GFP_KERNEL);
if (!group)
return -ENOMEM;
snd_pcm_group_init(group);
@@ -3291,7 +3291,6 @@ static int snd_pcm_xfern_frames_ioctl(struct snd_pcm_substream *substream,
{
struct snd_xfern xfern;
struct snd_pcm_runtime *runtime = substream->runtime;
- void *bufs __free(kfree) = NULL;
snd_pcm_sframes_t result;

if (runtime->state == SNDRV_PCM_STATE_OPEN)
@@ -3303,7 +3302,8 @@ static int snd_pcm_xfern_frames_ioctl(struct snd_pcm_substream *substream,
if (copy_from_user(&xfern, _xfern, sizeof(xfern)))
return -EFAULT;

- bufs = memdup_array_user(xfern.bufs, runtime->channels, sizeof(void *));
+ void *bufs __free(kfree) =
+ memdup_array_user(xfern.bufs, runtime->channels, sizeof(void *));
if (IS_ERR(bufs))
return PTR_ERR(bufs);
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
@@ -3577,7 +3577,6 @@ static ssize_t snd_pcm_readv(struct kiocb *iocb, struct iov_iter *to)
struct snd_pcm_runtime *runtime;
snd_pcm_sframes_t result;
unsigned long i;
- void __user **bufs __free(kfree) = NULL;
snd_pcm_uframes_t frames;
const struct iovec *iov = iter_iov(to);

@@ -3596,7 +3595,9 @@ static ssize_t snd_pcm_readv(struct kiocb *iocb, struct iov_iter *to)
if (!frame_aligned(runtime, iov->iov_len))
return -EINVAL;
frames = bytes_to_samples(runtime, iov->iov_len);
- bufs = kmalloc_array(to->nr_segs, sizeof(void *), GFP_KERNEL);
+
+ void __user **bufs __free(kfree) =
+ kmalloc_array(to->nr_segs, sizeof(void *), GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
for (i = 0; i < to->nr_segs; ++i) {
@@ -3616,7 +3617,6 @@ static ssize_t snd_pcm_writev(struct kiocb *iocb, struct iov_iter *from)
struct snd_pcm_runtime *runtime;
snd_pcm_sframes_t result;
unsigned long i;
- void __user **bufs __free(kfree) = NULL;
snd_pcm_uframes_t frames;
const struct iovec *iov = iter_iov(from);

@@ -3634,7 +3634,9 @@ static ssize_t snd_pcm_writev(struct kiocb *iocb, struct iov_iter *from)
!frame_aligned(runtime, iov->iov_len))
return -EINVAL;
frames = bytes_to_samples(runtime, iov->iov_len);
- bufs = kmalloc_array(from->nr_segs, sizeof(void *), GFP_KERNEL);
+
+ void __user **bufs __free(kfree) =
+ kmalloc_array(from->nr_segs, sizeof(void *), GFP_KERNEL);
if (bufs == NULL)
return -ENOMEM;
for (i = 0; i < from->nr_segs; ++i) {
@@ -4106,15 +4108,15 @@ static void snd_pcm_hw_convert_to_old_params(struct snd_pcm_hw_params_old *opara
static int snd_pcm_hw_refine_old_user(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params_old __user * _oparams)
{
- struct snd_pcm_hw_params *params __free(kfree) = NULL;
- struct snd_pcm_hw_params_old *oparams __free(kfree) = NULL;
int err;

- params = kmalloc(sizeof(*params), GFP_KERNEL);
+ struct snd_pcm_hw_params *params __free(kfree) =
+ kmalloc(sizeof(*params), GFP_KERNEL);
if (!params)
return -ENOMEM;

- oparams = memdup_user(_oparams, sizeof(*oparams));
+ struct snd_pcm_hw_params_old *oparams __free(kfree) =
+ memdup_user(_oparams, sizeof(*oparams));
if (IS_ERR(oparams))
return PTR_ERR(oparams);
snd_pcm_hw_convert_from_old_params(params, oparams);
@@ -4135,15 +4137,15 @@ static int snd_pcm_hw_refine_old_user(struct snd_pcm_substream *substream,
static int snd_pcm_hw_params_old_user(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params_old __user * _oparams)
{
- struct snd_pcm_hw_params *params __free(kfree) = NULL;
- struct snd_pcm_hw_params_old *oparams __free(kfree) = NULL;
int err;

- params = kmalloc(sizeof(*params), GFP_KERNEL);
+ struct snd_pcm_hw_params *params __free(kfree) =
+ kmalloc(sizeof(*params), GFP_KERNEL);
if (!params)
return -ENOMEM;

- oparams = memdup_user(_oparams, sizeof(*oparams));
+ struct snd_pcm_hw_params_old *oparams __free(kfree) =
+ memdup_user(_oparams, sizeof(*oparams));
if (IS_ERR(oparams))
return PTR_ERR(oparams);

diff --git a/sound/core/vmaster.c b/sound/core/vmaster.c
index c657659b236c..76cc64245f5d 100644
--- a/sound/core/vmaster.c
+++ b/sound/core/vmaster.c
@@ -56,10 +56,10 @@ struct link_follower {

static int follower_update(struct link_follower *follower)
{
- struct snd_ctl_elem_value *uctl __free(kfree) = NULL;
int err, ch;
+ struct snd_ctl_elem_value *uctl __free(kfree) =
+ kzalloc(sizeof(*uctl), GFP_KERNEL);

- uctl = kzalloc(sizeof(*uctl), GFP_KERNEL);
if (!uctl)
return -ENOMEM;
uctl->id = follower->follower.id;
@@ -74,7 +74,6 @@ static int follower_update(struct link_follower *follower)
/* get the follower ctl info and save the initial values */
static int follower_init(struct link_follower *follower)
{
- struct snd_ctl_elem_info *uinfo __free(kfree) = NULL;
int err;

if (follower->info.count) {
@@ -84,7 +83,8 @@ static int follower_init(struct link_follower *follower)
return 0;
}

- uinfo = kmalloc(sizeof(*uinfo), GFP_KERNEL);
+ struct snd_ctl_elem_info *uinfo __free(kfree) =
+ kmalloc(sizeof(*uinfo), GFP_KERNEL);
if (!uinfo)
return -ENOMEM;
uinfo->id = follower->follower.id;
@@ -341,9 +341,9 @@ static int master_get(struct snd_kcontrol *kcontrol,
static int sync_followers(struct link_master *master, int old_val, int new_val)
{
struct link_follower *follower;
- struct snd_ctl_elem_value *uval __free(kfree) = NULL;
+ struct snd_ctl_elem_value *uval __free(kfree) =
+ kmalloc(sizeof(*uval), GFP_KERNEL);

- uval = kmalloc(sizeof(*uval), GFP_KERNEL);
if (!uval)
return -ENOMEM;
list_for_each_entry(follower, &master->followers, list) {
diff --git a/sound/hda/codecs/conexant.c b/sound/hda/codecs/conexant.c
index d6fba7460301..0c517378a6d2 100644
--- a/sound/hda/codecs/conexant.c
+++ b/sound/hda/codecs/conexant.c
@@ -299,6 +299,7 @@ enum {
CXT_PINCFG_SWS_JS201D,
CXT_PINCFG_TOP_SPEAKER,
CXT_FIXUP_HP_A_U,
+ CXT_FIXUP_ACER_SWIFT_HP,
};

/* for hda_fixup_thinkpad_acpi() */
@@ -1024,6 +1025,14 @@ static const struct hda_fixup cxt_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = cxt_fixup_hp_a_u,
},
+ [CXT_FIXUP_ACER_SWIFT_HP] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x16, 0x0321403f }, /* Headphone */
+ { 0x19, 0x40f001f0 }, /* Mic */
+ { }
+ },
+ },
};

static const struct hda_quirk cxt5045_fixups[] = {
@@ -1073,6 +1082,7 @@ static const struct hda_quirk cxt5066_fixups[] = {
SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC),
SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC),
+ SND_PCI_QUIRK(0x1025, 0x136d, "Acer Swift SF314", CXT_FIXUP_ACER_SWIFT_HP),
SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK),
SND_PCI_QUIRK(0x103c, 0x807C, "HP EliteBook 820 G3", CXT_FIXUP_HP_DOCK),
SND_PCI_QUIRK(0x103c, 0x80FD, "HP ProBook 640 G2", CXT_FIXUP_HP_DOCK),
diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c
index a16cb45ac59e..66da4584aa7a 100644
--- a/sound/hda/codecs/realtek/alc269.c
+++ b/sound/hda/codecs/realtek/alc269.c
@@ -6951,7 +6951,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x144d, 0xc872, "Samsung Galaxy Book2 Pro (NP950XEE)", ALC298_FIXUP_SAMSUNG_AMP_V2_2_AMPS),
SND_PCI_QUIRK(0x144d, 0xc886, "Samsung Galaxy Book3 Pro (NP964XFG)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS),
SND_PCI_QUIRK(0x144d, 0xc1ca, "Samsung Galaxy Book3 Pro 360 (NP960QFG)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS),
+ SND_PCI_QUIRK(0x144d, 0xc1cb, "Samsung Galaxy Book3 Pro 360 (NP965QFG)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS),
SND_PCI_QUIRK(0x144d, 0xc1cc, "Samsung Galaxy Book3 Ultra (NT960XFH)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS),
+ SND_PCI_QUIRK(0x1458, 0x900e, "Gigabyte G5 KF5 (2023)", ALC2XX_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
diff --git a/sound/soc/codecs/aw88261.c b/sound/soc/codecs/aw88261.c
index de11ae8dd9d9..124c0a58d08b 100644
--- a/sound/soc/codecs/aw88261.c
+++ b/sound/soc/codecs/aw88261.c
@@ -423,9 +423,10 @@ static int aw88261_dev_reg_update(struct aw88261 *aw88261,
if (ret)
break;

+ /* keep all three bits from current hw status */
read_val &= (~AW88261_AMPPD_MASK) | (~AW88261_PWDN_MASK) |
(~AW88261_HMUTE_MASK);
- reg_val &= (AW88261_AMPPD_MASK | AW88261_PWDN_MASK | AW88261_HMUTE_MASK);
+ reg_val &= (AW88261_AMPPD_MASK & AW88261_PWDN_MASK & AW88261_HMUTE_MASK);
reg_val |= read_val;

/* enable uls hmute */
diff --git a/sound/soc/codecs/nau8821.c b/sound/soc/codecs/nau8821.c
index 4fa9a785513e..dfb9630bffe2 100644
--- a/sound/soc/codecs/nau8821.c
+++ b/sound/soc/codecs/nau8821.c
@@ -1661,8 +1661,13 @@ int nau8821_enable_jack_detect(struct snd_soc_component *component,
int ret;

nau8821->jack = jack;
+
+ if (nau8821->jdet_active)
+ return 0;
+
/* Initiate jack detection work queue */
INIT_DELAYED_WORK(&nau8821->jdet_work, nau8821_jdet_work);
+ nau8821->jdet_active = true;

ret = devm_request_threaded_irq(nau8821->dev, nau8821->irq, NULL,
nau8821_interrupt, IRQF_TRIGGER_LOW | IRQF_ONESHOT,
diff --git a/sound/soc/codecs/nau8821.h b/sound/soc/codecs/nau8821.h
index 88602923780d..f9d7cd8cbd21 100644
--- a/sound/soc/codecs/nau8821.h
+++ b/sound/soc/codecs/nau8821.h
@@ -562,6 +562,7 @@ struct nau8821 {
struct snd_soc_dapm_context *dapm;
struct snd_soc_jack *jack;
struct delayed_work jdet_work;
+ bool jdet_active;
int irq;
int clk_id;
int micbias_voltage;
diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c
index 51669e5fe888..58db4906a01d 100644
--- a/sound/soc/fsl/fsl_xcvr.c
+++ b/sound/soc/fsl/fsl_xcvr.c
@@ -223,13 +223,10 @@ static int fsl_xcvr_mode_put(struct snd_kcontrol *kcontrol,

xcvr->mode = snd_soc_enum_item_to_val(e, item[0]);

- down_read(&card->snd_card->controls_rwsem);
fsl_xcvr_activate_ctl(dai, fsl_xcvr_arc_mode_kctl.name,
(xcvr->mode == FSL_XCVR_MODE_ARC));
fsl_xcvr_activate_ctl(dai, fsl_xcvr_earc_capds_kctl.name,
(xcvr->mode == FSL_XCVR_MODE_EARC));
- up_read(&card->snd_card->controls_rwsem);
-
/* Allow playback for SPDIF only */
rtd = snd_soc_get_pcm_runtime(card, card->dai_link);
rtd->pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream_count =
diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c
index d9a1fab7f403..e4697ee0addc 100644
--- a/sound/soc/rockchip/rockchip_i2s_tdm.c
+++ b/sound/soc/rockchip/rockchip_i2s_tdm.c
@@ -22,6 +22,7 @@

#define DRV_NAME "rockchip-i2s-tdm"

+#define DEFAULT_MCLK_FS 256
#define CH_GRP_MAX 4 /* The max channel 8 / 2 */
#define MULTIPLEX_CH_MAX 10

@@ -665,6 +666,15 @@ static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream,
mclk_rate = i2s_tdm->mclk_rx_freq;
}

+ /*
+ * When the dai/component driver doesn't need to set mclk-fs for a specific
+ * clock, it can skip the call to set_sysclk() for that clock.
+ * In that case, simply use the clock rate from the params and multiply it by
+ * the default mclk-fs value.
+ */
+ if (!mclk_rate)
+ mclk_rate = DEFAULT_MCLK_FS * params_rate(params);
+
err = clk_set_rate(mclk, mclk_rate);
if (err)
return err;
diff --git a/sound/soc/sdca/sdca_asoc.c b/sound/soc/sdca/sdca_asoc.c
index 892b7c028fae..197a592ec2f1 100644
--- a/sound/soc/sdca/sdca_asoc.c
+++ b/sound/soc/sdca/sdca_asoc.c
@@ -16,6 +16,7 @@
#include <linux/minmax.h>
#include <linux/module.h>
#include <linux/overflow.h>
+#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/soundwire/sdw_registers.h>
#include <linux/string_helpers.h>
@@ -836,6 +837,48 @@ static int control_limit_kctl(struct device *dev,
return 0;
}

+static int volatile_get_volsw(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+ struct device *dev = component->dev;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0) {
+ dev_err(dev, "failed to resume reading %s: %d\n",
+ kcontrol->id.name, ret);
+ return ret;
+ }
+
+ ret = snd_soc_get_volsw(kcontrol, ucontrol);
+
+ pm_runtime_put(dev);
+
+ return ret;
+}
+
+static int volatile_put_volsw(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+ struct device *dev = component->dev;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0) {
+ dev_err(dev, "failed to resume writing %s: %d\n",
+ kcontrol->id.name, ret);
+ return ret;
+ }
+
+ ret = snd_soc_put_volsw(kcontrol, ucontrol);
+
+ pm_runtime_put(dev);
+
+ return ret;
+}
+
static int populate_control(struct device *dev,
struct sdca_function_data *function,
struct sdca_entity *entity,
@@ -890,8 +933,13 @@ static int populate_control(struct device *dev,
(*kctl)->private_value = (unsigned long)mc;
(*kctl)->iface = SNDRV_CTL_ELEM_IFACE_MIXER;
(*kctl)->info = snd_soc_info_volsw;
- (*kctl)->get = snd_soc_get_volsw;
- (*kctl)->put = snd_soc_put_volsw;
+ if (control->is_volatile) {
+ (*kctl)->get = volatile_get_volsw;
+ (*kctl)->put = volatile_put_volsw;
+ } else {
+ (*kctl)->get = snd_soc_get_volsw;
+ (*kctl)->put = snd_soc_put_volsw;
+ }

if (readonly_control(control))
(*kctl)->access = SNDRV_CTL_ELEM_ACCESS_READ;
@@ -1519,7 +1567,7 @@ static int set_usage(struct device *dev, struct regmap *regmap,
unsigned int rate = sdca_range(range, SDCA_USAGE_SAMPLE_RATE, i);
unsigned int width = sdca_range(range, SDCA_USAGE_SAMPLE_WIDTH, i);

- if ((!rate || rate == target_rate) && width == target_width) {
+ if ((!rate || rate == target_rate) && (!width || width == target_width)) {
unsigned int usage = sdca_range(range, SDCA_USAGE_NUMBER, i);
unsigned int reg = SDW_SDCA_CTL(function->desc->adr,
entity->id, sel, 0);
diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c
index 19b12564f822..4417278e39bb 100644
--- a/sound/soc/sdca/sdca_functions.c
+++ b/sound/soc/sdca/sdca_functions.c
@@ -779,6 +779,62 @@ find_sdca_control_datatype(const struct sdca_entity *entity,
}
}

+static bool find_sdca_control_volatile(const struct sdca_entity *entity,
+ const struct sdca_control *control)
+{
+ switch (control->mode) {
+ case SDCA_ACCESS_MODE_DC:
+ return false;
+ case SDCA_ACCESS_MODE_RO:
+ case SDCA_ACCESS_MODE_RW1S:
+ case SDCA_ACCESS_MODE_RW1C:
+ return true;
+ default:
+ break;
+ }
+
+ switch (SDCA_CTL_TYPE(entity->type, control->sel)) {
+ case SDCA_CTL_TYPE_S(XU, FDL_CURRENTOWNER):
+ case SDCA_CTL_TYPE_S(XU, FDL_MESSAGEOFFSET):
+ case SDCA_CTL_TYPE_S(XU, FDL_MESSAGELENGTH):
+ case SDCA_CTL_TYPE_S(XU, FDL_STATUS):
+ case SDCA_CTL_TYPE_S(XU, FDL_HOST_REQUEST):
+ case SDCA_CTL_TYPE_S(SPE, AUTHTX_CURRENTOWNER):
+ case SDCA_CTL_TYPE_S(SPE, AUTHTX_MESSAGEOFFSET):
+ case SDCA_CTL_TYPE_S(SPE, AUTHTX_MESSAGELENGTH):
+ case SDCA_CTL_TYPE_S(SPE, AUTHRX_CURRENTOWNER):
+ case SDCA_CTL_TYPE_S(SPE, AUTHRX_MESSAGEOFFSET):
+ case SDCA_CTL_TYPE_S(SPE, AUTHRX_MESSAGELENGTH):
+ case SDCA_CTL_TYPE_S(MFPU, AE_CURRENTOWNER):
+ case SDCA_CTL_TYPE_S(MFPU, AE_MESSAGEOFFSET):
+ case SDCA_CTL_TYPE_S(MFPU, AE_MESSAGELENGTH):
+ case SDCA_CTL_TYPE_S(SMPU, HIST_CURRENTOWNER):
+ case SDCA_CTL_TYPE_S(SMPU, HIST_MESSAGEOFFSET):
+ case SDCA_CTL_TYPE_S(SMPU, HIST_MESSAGELENGTH):
+ case SDCA_CTL_TYPE_S(SMPU, DTODTX_CURRENTOWNER):
+ case SDCA_CTL_TYPE_S(SMPU, DTODTX_MESSAGEOFFSET):
+ case SDCA_CTL_TYPE_S(SMPU, DTODTX_MESSAGELENGTH):
+ case SDCA_CTL_TYPE_S(SMPU, DTODRX_CURRENTOWNER):
+ case SDCA_CTL_TYPE_S(SMPU, DTODRX_MESSAGEOFFSET):
+ case SDCA_CTL_TYPE_S(SMPU, DTODRX_MESSAGELENGTH):
+ case SDCA_CTL_TYPE_S(SAPU, DTODTX_CURRENTOWNER):
+ case SDCA_CTL_TYPE_S(SAPU, DTODTX_MESSAGEOFFSET):
+ case SDCA_CTL_TYPE_S(SAPU, DTODTX_MESSAGELENGTH):
+ case SDCA_CTL_TYPE_S(SAPU, DTODRX_CURRENTOWNER):
+ case SDCA_CTL_TYPE_S(SAPU, DTODRX_MESSAGEOFFSET):
+ case SDCA_CTL_TYPE_S(SAPU, DTODRX_MESSAGELENGTH):
+ case SDCA_CTL_TYPE_S(HIDE, HIDTX_CURRENTOWNER):
+ case SDCA_CTL_TYPE_S(HIDE, HIDTX_MESSAGEOFFSET):
+ case SDCA_CTL_TYPE_S(HIDE, HIDTX_MESSAGELENGTH):
+ case SDCA_CTL_TYPE_S(HIDE, HIDRX_CURRENTOWNER):
+ case SDCA_CTL_TYPE_S(HIDE, HIDRX_MESSAGEOFFSET):
+ case SDCA_CTL_TYPE_S(HIDE, HIDRX_MESSAGELENGTH):
+ return true;
+ default:
+ return false;
+ }
+}
+
static int find_sdca_control_range(struct device *dev,
struct fwnode_handle *control_node,
struct sdca_control_range *range)
@@ -854,10 +910,6 @@ static int find_sdca_control_value(struct device *dev, struct sdca_entity *entit
return 0;
}

-/*
- * TODO: Add support for -cn- properties, allowing different channels to have
- * different defaults etc.
- */
static int find_sdca_entity_control(struct device *dev, struct sdca_entity *entity,
struct fwnode_handle *control_node,
struct sdca_control *control)
@@ -931,6 +983,8 @@ static int find_sdca_entity_control(struct device *dev, struct sdca_entity *enti
break;
}

+ control->is_volatile = find_sdca_control_volatile(entity, control);
+
ret = find_sdca_control_range(dev, control_node, &control->range);
if (ret) {
dev_err(dev, "%s: control %#x: range missing: %d\n",
diff --git a/sound/soc/sdca/sdca_regmap.c b/sound/soc/sdca/sdca_regmap.c
index 72f893e00ff5..8fa138fca00f 100644
--- a/sound/soc/sdca/sdca_regmap.c
+++ b/sound/soc/sdca/sdca_regmap.c
@@ -147,14 +147,7 @@ bool sdca_regmap_volatile(struct sdca_function_data *function, unsigned int reg)
if (!control)
return false;

- switch (control->mode) {
- case SDCA_ACCESS_MODE_RO:
- case SDCA_ACCESS_MODE_RW1S:
- case SDCA_ACCESS_MODE_RW1C:
- return true;
- default:
- return false;
- }
+ return control->is_volatile;
}
EXPORT_SYMBOL_NS(sdca_regmap_volatile, "SND_SOC_SDCA");

diff --git a/sound/soc/tegra/tegra210_ahub.c b/sound/soc/tegra/tegra210_ahub.c
index 21aeaeba0b10..01d60a74ad1c 100644
--- a/sound/soc/tegra/tegra210_ahub.c
+++ b/sound/soc/tegra/tegra210_ahub.c
@@ -2049,6 +2049,61 @@ static const struct snd_soc_component_driver tegra264_ahub_component = {
.num_dapm_routes = ARRAY_SIZE(tegra264_ahub_routes),
};

+static bool tegra210_ahub_wr_reg(struct device *dev, unsigned int reg)
+{
+ int part;
+
+ if (reg % TEGRA210_XBAR_RX_STRIDE)
+ return false;
+
+ for (part = 0; part < TEGRA210_XBAR_UPDATE_MAX_REG; part++) {
+ switch (reg & ~(part * TEGRA210_XBAR_PART1_RX)) {
+ case TEGRA210_AXBAR_PART_0_ADMAIF_RX1_0 ... TEGRA210_AXBAR_PART_0_ADMAIF_RX10_0:
+ case TEGRA210_AXBAR_PART_0_I2S1_RX1_0 ... TEGRA210_AXBAR_PART_0_I2S5_RX1_0:
+ case TEGRA210_AXBAR_PART_0_SFC1_RX1_0 ... TEGRA210_AXBAR_PART_0_SFC4_RX1_0:
+ case TEGRA210_AXBAR_PART_0_MIXER1_RX1_0 ... TEGRA210_AXBAR_PART_0_MIXER1_RX10_0:
+ case TEGRA210_AXBAR_PART_0_SPDIF1_RX1_0 ... TEGRA210_AXBAR_PART_0_SPDIF1_RX2_0:
+ case TEGRA210_AXBAR_PART_0_AFC1_RX1_0 ... TEGRA210_AXBAR_PART_0_AFC6_RX1_0:
+ case TEGRA210_AXBAR_PART_0_OPE1_RX1_0 ... TEGRA210_AXBAR_PART_0_OPE2_RX1_0:
+ case TEGRA210_AXBAR_PART_0_SPKPROT1_RX1_0:
+ case TEGRA210_AXBAR_PART_0_MVC1_RX1_0 ... TEGRA210_AXBAR_PART_0_MVC2_RX1_0:
+ case TEGRA210_AXBAR_PART_0_AMX1_RX1_0 ... TEGRA210_AXBAR_PART_0_ADX2_RX1_0:
+ return true;
+ default:
+ break;
+ }
+ }
+
+ return false;
+}
+
+static bool tegra186_ahub_wr_reg(struct device *dev, unsigned int reg)
+{
+ int part;
+
+ if (reg % TEGRA210_XBAR_RX_STRIDE)
+ return false;
+
+ for (part = 0; part < TEGRA186_XBAR_UPDATE_MAX_REG; part++) {
+ switch (reg & ~(part * TEGRA210_XBAR_PART1_RX)) {
+ case TEGRA210_AXBAR_PART_0_ADMAIF_RX1_0 ... TEGRA186_AXBAR_PART_0_I2S6_RX1_0:
+ case TEGRA210_AXBAR_PART_0_SFC1_RX1_0 ... TEGRA210_AXBAR_PART_0_SFC4_RX1_0:
+ case TEGRA210_AXBAR_PART_0_MIXER1_RX1_0 ... TEGRA210_AXBAR_PART_0_MIXER1_RX10_0:
+ case TEGRA186_AXBAR_PART_0_DSPK1_RX1_0 ... TEGRA186_AXBAR_PART_0_DSPK2_RX1_0:
+ case TEGRA210_AXBAR_PART_0_AFC1_RX1_0 ... TEGRA210_AXBAR_PART_0_AFC6_RX1_0:
+ case TEGRA210_AXBAR_PART_0_OPE1_RX1_0:
+ case TEGRA186_AXBAR_PART_0_MVC1_RX1_0 ... TEGRA186_AXBAR_PART_0_MVC2_RX1_0:
+ case TEGRA186_AXBAR_PART_0_AMX1_RX1_0 ... TEGRA186_AXBAR_PART_0_AMX3_RX4_0:
+ case TEGRA210_AXBAR_PART_0_ADX1_RX1_0 ... TEGRA186_AXBAR_PART_0_ASRC1_RX7_0:
+ return true;
+ default:
+ break;
+ }
+ }
+
+ return false;
+}
+
static bool tegra264_ahub_wr_reg(struct device *dev, unsigned int reg)
{
int part;
@@ -2076,6 +2131,7 @@ static const struct regmap_config tegra210_ahub_regmap_config = {
.reg_bits = 32,
.val_bits = 32,
.reg_stride = 4,
+ .writeable_reg = tegra210_ahub_wr_reg,
.max_register = TEGRA210_MAX_REGISTER_ADDR,
.cache_type = REGCACHE_FLAT,
};
@@ -2084,6 +2140,7 @@ static const struct regmap_config tegra186_ahub_regmap_config = {
.reg_bits = 32,
.val_bits = 32,
.reg_stride = 4,
+ .writeable_reg = tegra186_ahub_wr_reg,
.max_register = TEGRA186_MAX_REGISTER_ADDR,
.cache_type = REGCACHE_FLAT,
};
diff --git a/sound/soc/tegra/tegra210_ahub.h b/sound/soc/tegra/tegra210_ahub.h
index f355b2cfd19b..acbe640dd3b5 100644
--- a/sound/soc/tegra/tegra210_ahub.h
+++ b/sound/soc/tegra/tegra210_ahub.h
@@ -68,6 +68,36 @@
#define TEGRA210_MAX_REGISTER_ADDR (TEGRA210_XBAR_PART2_RX + \
(TEGRA210_XBAR_RX_STRIDE * (TEGRA210_XBAR_AUDIO_RX_COUNT - 1)))

+/* AXBAR register offsets */
+#define TEGRA186_AXBAR_PART_0_AMX1_RX1_0 0x120
+#define TEGRA186_AXBAR_PART_0_AMX3_RX4_0 0x14c
+#define TEGRA186_AXBAR_PART_0_ASRC1_RX7_0 0x1a8
+#define TEGRA186_AXBAR_PART_0_DSPK1_RX1_0 0xc0
+#define TEGRA186_AXBAR_PART_0_DSPK2_RX1_0 0xc4
+#define TEGRA186_AXBAR_PART_0_I2S6_RX1_0 0x54
+#define TEGRA186_AXBAR_PART_0_MVC1_RX1_0 0x110
+#define TEGRA186_AXBAR_PART_0_MVC2_RX1_0 0x114
+#define TEGRA210_AXBAR_PART_0_ADMAIF_RX10_0 0x24
+#define TEGRA210_AXBAR_PART_0_ADMAIF_RX1_0 0x0
+#define TEGRA210_AXBAR_PART_0_ADX1_RX1_0 0x160
+#define TEGRA210_AXBAR_PART_0_ADX2_RX1_0 0x164
+#define TEGRA210_AXBAR_PART_0_AFC1_RX1_0 0xd0
+#define TEGRA210_AXBAR_PART_0_AFC6_RX1_0 0xe4
+#define TEGRA210_AXBAR_PART_0_AMX1_RX1_0 0x140
+#define TEGRA210_AXBAR_PART_0_I2S1_RX1_0 0x40
+#define TEGRA210_AXBAR_PART_0_I2S5_RX1_0 0x50
+#define TEGRA210_AXBAR_PART_0_MIXER1_RX10_0 0xa4
+#define TEGRA210_AXBAR_PART_0_MIXER1_RX1_0 0x80
+#define TEGRA210_AXBAR_PART_0_MVC1_RX1_0 0x120
+#define TEGRA210_AXBAR_PART_0_MVC2_RX1_0 0x124
+#define TEGRA210_AXBAR_PART_0_OPE1_RX1_0 0x100
+#define TEGRA210_AXBAR_PART_0_OPE2_RX1_0 0x104
+#define TEGRA210_AXBAR_PART_0_SFC1_RX1_0 0x60
+#define TEGRA210_AXBAR_PART_0_SFC4_RX1_0 0x6c
+#define TEGRA210_AXBAR_PART_0_SPDIF1_RX1_0 0xc0
+#define TEGRA210_AXBAR_PART_0_SPDIF1_RX2_0 0xc4
+#define TEGRA210_AXBAR_PART_0_SPKPROT1_RX1_0 0x110
+
#define MUX_REG(id) (TEGRA210_XBAR_RX_STRIDE * (id))

#define MUX_VALUE(npart, nbit) (1 + (nbit) + (npart) * 32)
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index cfc6f944f7c3..1a06b0b5eef3 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -156,7 +156,7 @@ static int netlink_recv(int sock, __u32 nl_pid, __u32 seq,
bool multipart = true;
struct nlmsgerr *err;
struct nlmsghdr *nh;
- char buf[4096];
+ char buf[8192];
int len, ret;

while (multipart) {
@@ -201,6 +201,9 @@ static int netlink_recv(int sock, __u32 nl_pid, __u32 seq,
return ret;
}
}
+
+ if (len)
+ p_err("Invalid message or trailing data in Netlink response: %d bytes left", len);
}
ret = 0;
done:
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 6388392f49a0..53c6624161d7 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -1762,9 +1762,18 @@ static int btf_dump_get_bitfield_value(struct btf_dump *d,
__u16 left_shift_bits, right_shift_bits;
const __u8 *bytes = data;
__u8 nr_copy_bits;
+ __u8 start_bit, nr_bytes;
__u64 num = 0;
int i;

+ /* Calculate how many bytes cover the bitfield */
+ start_bit = bits_offset % 8;
+ nr_bytes = (start_bit + bit_sz + 7) / 8;
+
+ /* Bound check */
+ if (data + nr_bytes > d->typed_dump->data_end)
+ return -E2BIG;
+
/* Maximum supported bitfield size is 64 bits */
if (t->size > 8) {
pr_warn("unexpected bitfield size %d\n", t->size);
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 56ae77047bc3..b86b8ae3b634 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -581,7 +581,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz,

written = 0;
while (written < buf_sz) {
- ret = write(fd, buf, buf_sz);
+ ret = write(fd, buf + written, buf_sz - written);
if (ret < 0) {
ret = -errno;
pr_warn("failed to write '%s': %s\n", filename, errstr(ret));
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index c997e69d507f..c9a78fb16f11 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -143,7 +143,7 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
struct nlmsghdr *nh;
int len, ret;

- ret = alloc_iov(&iov, 4096);
+ ret = alloc_iov(&iov, 8192);
if (ret)
goto done;

@@ -212,6 +212,8 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
}
}
}
+ if (len)
+ pr_warn("Invalid message or trailing data in Netlink response: %d bytes left\n", len);
}
ret = 0;
done:
diff --git a/tools/net/sunrpc/xdrgen/generators/__init__.py b/tools/net/sunrpc/xdrgen/generators/__init__.py
index b98574a36a4a..a2eb6652ac90 100644
--- a/tools/net/sunrpc/xdrgen/generators/__init__.py
+++ b/tools/net/sunrpc/xdrgen/generators/__init__.py
@@ -6,7 +6,7 @@ import sys
from jinja2 import Environment, FileSystemLoader, Template

from xdr_ast import _XdrAst, Specification, _RpcProgram, _XdrTypeSpecifier
-from xdr_ast import public_apis, pass_by_reference, get_header_name
+from xdr_ast import public_apis, pass_by_reference, structs, get_header_name
from xdr_parse import get_xdr_annotate


@@ -22,6 +22,7 @@ def create_jinja2_environment(language: str, xdr_type: str) -> Environment:
environment.globals["annotate"] = get_xdr_annotate()
environment.globals["public_apis"] = public_apis
environment.globals["pass_by_reference"] = pass_by_reference
+ environment.globals["structs"] = structs
return environment
case _:
raise NotImplementedError("Language not supported")
diff --git a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/argument.j2 b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/argument.j2
index 0b1709cca0d4..19b219dd276d 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/argument.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/argument.j2
@@ -14,7 +14,11 @@ bool {{ program }}_svc_decode_{{ argument }}(struct svc_rqst *rqstp, struct xdr_
{% if argument == 'void' %}
return xdrgen_decode_void(xdr);
{% else %}
+{% if argument in structs %}
struct {{ argument }} *argp = rqstp->rq_argp;
+{% else %}
+ {{ argument }} *argp = rqstp->rq_argp;
+{% endif %}

return xdrgen_decode_{{ argument }}(xdr, argp);
{% endif %}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/program/encoder/result.j2 b/tools/net/sunrpc/xdrgen/templates/C/program/encoder/result.j2
index 6fc61a5d47b7..746592cfda56 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/program/encoder/result.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/program/encoder/result.j2
@@ -14,8 +14,14 @@ bool {{ program }}_svc_encode_{{ result }}(struct svc_rqst *rqstp, struct xdr_st
{% if result == 'void' %}
return xdrgen_encode_void(xdr);
{% else %}
+{% if result in structs %}
struct {{ result }} *resp = rqstp->rq_resp;

return xdrgen_encode_{{ result }}(xdr, resp);
+{% else %}
+ {{ result }} *resp = rqstp->rq_resp;
+
+ return xdrgen_encode_{{ result }}(xdr, *resp);
+{% endif %}
{% endif %}
}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 b/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2
index c5518c519854..df3598c38b2c 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2
@@ -8,6 +8,5 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/xdrgen/_defs.h>
#include <linux/sunrpc/xdrgen/_builtins.h>
-#include <linux/sunrpc/xdrgen/nlm4.h>

#include <linux/sunrpc/clnt.h>
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 8c20361dd100..99d3897e046c 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -7,6 +7,8 @@ srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
endif

+RM ?= rm -f
+
LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/
ifneq ($(OUTPUT),)
LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 0ce251b8d466..a7d54dfd3c68 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -950,9 +950,11 @@ int isolate_cpus(struct isst_id *id, int mask_size, cpu_set_t *cpu_mask, int lev
ret = write(fd, "member", strlen("member"));
if (ret == -1) {
printf("Can't update to member\n");
+ close(fd);
return ret;
}

+ close(fd);
return 0;
}

diff --git a/tools/spi/.gitignore b/tools/spi/.gitignore
index 14ddba3d2195..038261b34ed8 100644
--- a/tools/spi/.gitignore
+++ b/tools/spi/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
spidev_fdx
spidev_test
+include/
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e59b2bbf8d92..591e7e77f89b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -805,7 +805,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)

# Include find_bit.c to compile xskxceiver.
EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c
-$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
+$(OUTPUT)/xskxceiver: $(EXTRA_SRC) test_xsk.c test_xsk.h xskxceiver.c xskxceiver.h $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@

diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index 2a9a30650350..65b4512967e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -153,6 +153,26 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel,
ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user");
}

+static void test_chk_segs_flag(struct test_check_mtu *skel, __u32 mtu)
+{
+ int err, prog_fd = bpf_program__fd(skel->progs.tc_chk_segs_flag);
+ struct __sk_buff skb = {
+ .gso_size = 10,
+ };
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+
+ /* Lower the mtu to test the BPF_MTU_CHK_SEGS */
+ SYS_NOFAIL("ip link set dev lo mtu 10");
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ SYS_NOFAIL("ip link set dev lo mtu %u", mtu);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, BPF_OK, "retval");
+}

static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
{
@@ -177,11 +197,12 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu);
test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu);
test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu);
+ test_chk_segs_flag(skel, mtu);
cleanup:
test_check_mtu__destroy(skel);
}

-void serial_test_check_mtu(void)
+void test_ns_check_mtu(void)
{
int mtu_lo;

diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
index c9efdd2a5b18..c93718dafd9b 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
@@ -74,11 +74,20 @@ static void test_stacktrace_ips_kprobe_multi(bool retprobe)

load_kallsyms();

- check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
- ksym_get_addr("bpf_testmod_stacktrace_test_3"),
- ksym_get_addr("bpf_testmod_stacktrace_test_2"),
- ksym_get_addr("bpf_testmod_stacktrace_test_1"),
- ksym_get_addr("bpf_testmod_test_read"));
+ if (retprobe) {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ } else {
+ check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5,
+ ksym_get_addr("bpf_testmod_stacktrace_test"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+ ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+ ksym_get_addr("bpf_testmod_test_read"));
+ }

cleanup:
stacktrace_ips__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/wq.c b/tools/testing/selftests/bpf/prog_tests/wq.c
index 99e438fe12ac..15ac8e6d1745 100644
--- a/tools/testing/selftests/bpf/prog_tests/wq.c
+++ b/tools/testing/selftests/bpf/prog_tests/wq.c
@@ -16,12 +16,12 @@ void serial_test_wq(void)
/* re-run the success test to check if the timer was actually executed */

wq_skel = wq__open_and_load();
- if (!ASSERT_OK_PTR(wq_skel, "wq_skel_load"))
+ if (!ASSERT_OK_PTR(wq_skel, "wq__open_and_load"))
return;

err = wq__attach(wq_skel);
if (!ASSERT_OK(err, "wq_attach"))
- return;
+ goto clean_up;

prog_fd = bpf_program__fd(wq_skel->progs.test_syscall_array_sleepable);
err = bpf_prog_test_run_opts(prog_fd, &topts);
@@ -31,6 +31,7 @@ void serial_test_wq(void)
usleep(50); /* 10 usecs should be enough, but give it extra */

ASSERT_EQ(wq_skel->bss->ok_sleepable, (1 << 1), "ok_sleepable");
+clean_up:
wq__destroy(wq_skel);
}

diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
index 2ec1de11a3ae..7b6b2b342c1d 100644
--- a/tools/testing/selftests/bpf/progs/test_check_mtu.c
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -7,6 +7,7 @@

#include <stddef.h>
#include <stdint.h>
+#include <errno.h>

char _license[] SEC("license") = "GPL";

@@ -288,3 +289,14 @@ int tc_input_len_exceed(struct __sk_buff *ctx)
global_bpf_mtu_xdp = mtu_len;
return retval;
}
+
+SEC("tc")
+int tc_chk_segs_flag(struct __sk_buff *ctx)
+{
+ __u32 mtu_len = 0;
+ int err;
+
+ err = bpf_check_mtu(ctx, GLOBAL_USER_IFINDEX, &mtu_len, 0, BPF_MTU_CHK_SEGS);
+
+ return err == -EINVAL ? BPF_OK : BPF_DROP;
+}
diff --git a/tools/testing/selftests/bpf/test_xsk.c b/tools/testing/selftests/bpf/test_xsk.c
new file mode 100644
index 000000000000..55d318c5c5e5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xsk.c
@@ -0,0 +1,2422 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/bpf.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/if_link.h>
+#include <linux/mman.h>
+#include <linux/netdev.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "network_helpers.h"
+#include "test_xsk.h"
+#include "xsk_xdp_common.h"
+#include "xsk_xdp_progs.skel.h"
+
+#define DEFAULT_BATCH_SIZE 64
+#define MIN_PKT_SIZE 64
+#define MAX_ETH_JUMBO_SIZE 9000
+#define MAX_INTERFACES 2
+#define MAX_TEARDOWN_ITER 10
+#define MAX_TX_BUDGET_DEFAULT 32
+#define PKT_DUMP_NB_TO_PRINT 16
+/* Just to align the data in the packet */
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2)
+#define POLL_TMOUT 1000
+#define THREAD_TMOUT 3
+#define UMEM_HEADROOM_TEST_SIZE 128
+#define XSK_DESC__INVALID_OPTION (0xffff)
+#define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1)
+#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
+#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
+
+static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
+
+bool opt_verbose;
+pthread_barrier_t barr;
+pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int pkts_in_flight;
+
+/* The payload is a word consisting of a packet sequence number in the upper
+ * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's
+ * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0.
+ */
+static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
+{
+ u32 *ptr = (u32 *)dest, i;
+
+ start /= sizeof(*ptr);
+ size /= sizeof(*ptr);
+ for (i = 0; i < size; i++)
+ ptr[i] = htonl(pkt_nb << 16 | (i + start));
+}
+
+static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
+{
+ memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN);
+ memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN);
+ eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
+}
+
+static bool is_umem_valid(struct ifobject *ifobj)
+{
+ return !!ifobj->umem->umem;
+}
+
+static u32 mode_to_xdp_flags(enum test_mode mode)
+{
+ return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+}
+
+static u64 umem_size(struct xsk_umem_info *umem)
+{
+ return umem->num_frames * umem->frame_size;
+}
+
+int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer,
+ u64 size)
+{
+ struct xsk_umem_config cfg = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = umem->frame_size,
+ .frame_headroom = umem->frame_headroom,
+ .flags = XSK_UMEM__DEFAULT_FLAGS
+ };
+ int ret;
+
+ if (umem->fill_size)
+ cfg.fill_size = umem->fill_size;
+
+ if (umem->comp_size)
+ cfg.comp_size = umem->comp_size;
+
+ if (umem->unaligned_mode)
+ cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+
+ ret = xsk_umem__create(&umem->umem, buffer, size,
+ &umem->fq, &umem->cq, &cfg);
+ if (ret)
+ return ret;
+
+ umem->buffer = buffer;
+ if (ifobj->shared_umem && ifobj->rx_on) {
+ umem->base_addr = umem_size(umem);
+ umem->next_buffer = umem_size(umem);
+ }
+
+ return 0;
+}
+
+static u64 umem_alloc_buffer(struct xsk_umem_info *umem)
+{
+ u64 addr;
+
+ addr = umem->next_buffer;
+ umem->next_buffer += umem->frame_size;
+ if (umem->next_buffer >= umem->base_addr + umem_size(umem))
+ umem->next_buffer = umem->base_addr;
+
+ return addr;
+}
+
+static void umem_reset_alloc(struct xsk_umem_info *umem)
+{
+ umem->next_buffer = 0;
+}
+
+static void enable_busy_poll(struct xsk_socket_info *xsk)
+{
+ int sock_opt;
+
+ sock_opt = 1;
+ if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
+ (void *)&sock_opt, sizeof(sock_opt)) < 0)
+ exit_with_error(errno);
+
+ sock_opt = 20;
+ if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
+ (void *)&sock_opt, sizeof(sock_opt)) < 0)
+ exit_with_error(errno);
+
+ sock_opt = xsk->batch_size;
+ if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
+ (void *)&sock_opt, sizeof(sock_opt)) < 0)
+ exit_with_error(errno);
+}
+
+int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+ struct ifobject *ifobject, bool shared)
+{
+ struct xsk_socket_config cfg = {};
+ struct xsk_ring_cons *rxr;
+ struct xsk_ring_prod *txr;
+
+ xsk->umem = umem;
+ cfg.rx_size = xsk->rxqsize;
+ cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg.bind_flags = ifobject->bind_flags;
+ if (shared)
+ cfg.bind_flags |= XDP_SHARED_UMEM;
+ if (ifobject->mtu > MAX_ETH_PKT_SIZE)
+ cfg.bind_flags |= XDP_USE_SG;
+ if (umem->comp_size)
+ cfg.tx_size = umem->comp_size;
+ if (umem->fill_size)
+ cfg.rx_size = umem->fill_size;
+
+ txr = ifobject->tx_on ? &xsk->tx : NULL;
+ rxr = ifobject->rx_on ? &xsk->rx : NULL;
+ return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg);
+}
+
+#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
+static unsigned int get_max_skb_frags(void)
+{
+ unsigned int max_skb_frags = 0;
+ FILE *file;
+
+ file = fopen(MAX_SKB_FRAGS_PATH, "r");
+ if (!file) {
+ ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
+ return 0;
+ }
+
+ if (fscanf(file, "%u", &max_skb_frags) != 1)
+ ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
+
+ fclose(file);
+ return max_skb_frags;
+}
+
+static int set_ring_size(struct ifobject *ifobj)
+{
+ int ret;
+ u32 ctr = 0;
+
+ while (ctr++ < SOCK_RECONF_CTR) {
+ ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring);
+ if (!ret)
+ break;
+
+ /* Retry if it fails */
+ if (ctr >= SOCK_RECONF_CTR || errno != EBUSY)
+ return -errno;
+
+ usleep(USLEEP_MAX);
+ }
+
+ return ret;
+}
+
+int hw_ring_size_reset(struct ifobject *ifobj)
+{
+ ifobj->ring.tx_pending = ifobj->set_ring.default_tx;
+ ifobj->ring.rx_pending = ifobj->set_ring.default_rx;
+ return set_ring_size(ifobj);
+}
+
+static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+ struct ifobject *ifobj_rx)
+{
+ u32 i, j;
+
+ for (i = 0; i < MAX_INTERFACES; i++) {
+ struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+ ifobj->xsk = &ifobj->xsk_arr[0];
+ ifobj->use_poll = false;
+ ifobj->use_fill_ring = true;
+ ifobj->release_rx = true;
+ ifobj->validation_func = NULL;
+ ifobj->use_metadata = false;
+
+ if (i == 0) {
+ ifobj->rx_on = false;
+ ifobj->tx_on = true;
+ } else {
+ ifobj->rx_on = true;
+ ifobj->tx_on = false;
+ }
+
+ memset(ifobj->umem, 0, sizeof(*ifobj->umem));
+ ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
+ ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+
+ for (j = 0; j < MAX_SOCKETS; j++) {
+ memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
+ ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE;
+ if (i == 0)
+ ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
+ else
+ ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default;
+
+ memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN);
+ memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN);
+ ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0);
+ ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1);
+ }
+ }
+
+ if (ifobj_tx->hw_ring_size_supp)
+ hw_ring_size_reset(ifobj_tx);
+
+ test->ifobj_tx = ifobj_tx;
+ test->ifobj_rx = ifobj_rx;
+ test->current_step = 0;
+ test->total_steps = 1;
+ test->nb_sockets = 1;
+ test->fail = false;
+ test->set_ring = false;
+ test->adjust_tail = false;
+ test->adjust_tail_support = false;
+ test->mtu = MAX_ETH_PKT_SIZE;
+ test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
+ test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
+ test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
+ test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk;
+}
+
+void test_init(struct test_spec *test, struct ifobject *ifobj_tx,
+ struct ifobject *ifobj_rx, enum test_mode mode,
+ const struct test_spec *test_to_run)
+{
+ struct pkt_stream *tx_pkt_stream;
+ struct pkt_stream *rx_pkt_stream;
+ u32 i;
+
+ tx_pkt_stream = test->tx_pkt_stream_default;
+ rx_pkt_stream = test->rx_pkt_stream_default;
+ memset(test, 0, sizeof(*test));
+ test->tx_pkt_stream_default = tx_pkt_stream;
+ test->rx_pkt_stream_default = rx_pkt_stream;
+
+ for (i = 0; i < MAX_INTERFACES; i++) {
+ struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+ ifobj->bind_flags = XDP_USE_NEED_WAKEUP;
+ if (mode == TEST_MODE_ZC)
+ ifobj->bind_flags |= XDP_ZEROCOPY;
+ else
+ ifobj->bind_flags |= XDP_COPY;
+ }
+
+ memcpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE);
+ test->test_func = test_to_run->test_func;
+ test->mode = mode;
+ __test_spec_init(test, ifobj_tx, ifobj_rx);
+}
+
+static void test_spec_reset(struct test_spec *test)
+{
+ __test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
+}
+
+static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
+ struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
+ struct bpf_map *xskmap_tx)
+{
+ test->xdp_prog_rx = xdp_prog_rx;
+ test->xdp_prog_tx = xdp_prog_tx;
+ test->xskmap_rx = xskmap_rx;
+ test->xskmap_tx = xskmap_tx;
+}
+
+static int test_spec_set_mtu(struct test_spec *test, int mtu)
+{
+ int err;
+
+ if (test->ifobj_rx->mtu != mtu) {
+ err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu);
+ if (err)
+ return err;
+ test->ifobj_rx->mtu = mtu;
+ }
+ if (test->ifobj_tx->mtu != mtu) {
+ err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu);
+ if (err)
+ return err;
+ test->ifobj_tx->mtu = mtu;
+ }
+
+ return 0;
+}
+
+void pkt_stream_reset(struct pkt_stream *pkt_stream)
+{
+ if (pkt_stream) {
+ pkt_stream->current_pkt_nb = 0;
+ pkt_stream->nb_rx_pkts = 0;
+ }
+}
+
+static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
+{
+ if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts)
+ return NULL;
+
+ return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+}
+
+static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
+{
+ while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
+ (*pkts_sent)++;
+ if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid)
+ return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+ pkt_stream->current_pkt_nb++;
+ }
+ return NULL;
+}
+
+void pkt_stream_delete(struct pkt_stream *pkt_stream)
+{
+ free(pkt_stream->pkts);
+ free(pkt_stream);
+}
+
+void pkt_stream_restore_default(struct test_spec *test)
+{
+ struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+ struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream;
+
+ if (tx_pkt_stream != test->tx_pkt_stream_default) {
+ pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream);
+ test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default;
+ }
+
+ if (rx_pkt_stream != test->rx_pkt_stream_default) {
+ pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
+ test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default;
+ }
+}
+
+static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
+{
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = calloc(1, sizeof(*pkt_stream));
+ if (!pkt_stream)
+ return NULL;
+
+ pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+ if (!pkt_stream->pkts) {
+ free(pkt_stream);
+ return NULL;
+ }
+
+ pkt_stream->nb_pkts = nb_pkts;
+ return pkt_stream;
+}
+
+static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt)
+{
+ u32 nb_frags = 1, next_frag;
+
+ if (!pkt)
+ return 1;
+
+ if (!pkt_stream->verbatim) {
+ if (!pkt->valid || !pkt->len)
+ return 1;
+ return ceil_u32(pkt->len, frame_size);
+ }
+
+ /* Search for the end of the packet in verbatim mode */
+ if (!pkt_continues(pkt->options) || !pkt->valid)
+ return nb_frags;
+
+ next_frag = pkt_stream->current_pkt_nb;
+ pkt++;
+ while (next_frag++ < pkt_stream->nb_pkts) {
+ nb_frags++;
+ if (!pkt_continues(pkt->options) || !pkt->valid)
+ break;
+ pkt++;
+ }
+ return nb_frags;
+}
+
+static bool set_pkt_valid(int offset, u32 len)
+{
+ return len <= MAX_ETH_JUMBO_SIZE;
+}
+
+static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+ pkt->offset = offset;
+ pkt->len = len;
+ pkt->valid = set_pkt_valid(offset, len);
+}
+
+static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+ bool prev_pkt_valid = pkt->valid;
+
+ pkt_set(pkt_stream, pkt, offset, len);
+ pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid;
+}
+
+static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
+{
+ return ceil_u32(len, umem->frame_size) * umem->frame_size;
+}
+
+static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ pkt_stream = __pkt_stream_alloc(nb_pkts);
+ if (!pkt_stream)
+ exit_with_error(ENOMEM);
+
+ pkt_stream->nb_pkts = nb_pkts;
+ pkt_stream->max_pkt_len = pkt_len;
+ for (i = 0; i < nb_pkts; i++) {
+ struct pkt *pkt = &pkt_stream->pkts[i];
+
+ pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len);
+ pkt->pkt_nb = nb_start + i * nb_off;
+ }
+
+ return pkt_stream;
+}
+
+struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+{
+ return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
+}
+
+static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
+{
+ return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
+}
+
+static void pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len)
+{
+ ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+}
+
+static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+ pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len);
+ pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len);
+}
+
+static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
+ int offset)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
+ for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
+ pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
+
+ ifobj->xsk->pkt_stream = pkt_stream;
+}
+
+static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
+{
+ __pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset);
+ __pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset);
+}
+
+static void pkt_stream_receive_half(struct test_spec *test)
+{
+ struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+ u32 i;
+
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
+ pkt_stream->pkts[0].len);
+ pkt_stream = test->ifobj_rx->xsk->pkt_stream;
+ for (i = 1; i < pkt_stream->nb_pkts; i += 2)
+ pkt_stream->pkts[i].valid = false;
+
+ pkt_stream->nb_valid_entries /= 2;
+}
+
+static void pkt_stream_even_odd_sequence(struct test_spec *test)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
+ pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+ pkt_stream->pkts[0].len, i, 2);
+ test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
+
+ pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
+ pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+ pkt_stream->pkts[0].len, i, 2);
+ test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
+ }
+}
+
+static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
+{
+ if (!pkt->valid)
+ return pkt->offset;
+ return pkt->offset + umem_alloc_buffer(umem);
+}
+
+static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
+{
+ pkt_stream->current_pkt_nb--;
+}
+
+static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len,
+ u32 pkt_nb, u32 bytes_written)
+{
+ void *data = xsk_umem__get_data(umem->buffer, addr);
+
+ if (len < MIN_PKT_SIZE)
+ return;
+
+ if (!bytes_written) {
+ gen_eth_hdr(xsk, data);
+
+ len -= PKT_HDR_SIZE;
+ data += PKT_HDR_SIZE;
+ } else {
+ bytes_written -= PKT_HDR_SIZE;
+ }
+
+ write_payload(data, pkt_nb, bytes_written, len);
+}
+
+static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames,
+ u32 nb_frames, bool verbatim)
+{
+ u32 i, len = 0, pkt_nb = 0, payload = 0;
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = __pkt_stream_alloc(nb_frames);
+ if (!pkt_stream)
+ exit_with_error(ENOMEM);
+
+ for (i = 0; i < nb_frames; i++) {
+ struct pkt *pkt = &pkt_stream->pkts[pkt_nb];
+ struct pkt *frame = &frames[i];
+
+ pkt->offset = frame->offset;
+ if (verbatim) {
+ *pkt = *frame;
+ pkt->pkt_nb = payload;
+ if (!frame->valid || !pkt_continues(frame->options))
+ payload++;
+ } else {
+ if (frame->valid)
+ len += frame->len;
+ if (frame->valid && pkt_continues(frame->options))
+ continue;
+
+ pkt->pkt_nb = pkt_nb;
+ pkt->len = len;
+ pkt->valid = frame->valid;
+ pkt->options = 0;
+
+ len = 0;
+ }
+
+ print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n",
+ pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb);
+
+ if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
+ pkt_stream->max_pkt_len = pkt->len;
+
+ if (pkt->valid)
+ pkt_stream->nb_valid_entries++;
+
+ pkt_nb++;
+ }
+
+ pkt_stream->nb_pkts = pkt_nb;
+ pkt_stream->verbatim = verbatim;
+ return pkt_stream;
+}
+
+static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
+{
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
+ test->ifobj_tx->xsk->pkt_stream = pkt_stream;
+
+ pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream;
+}
+
+static void pkt_print_data(u32 *data, u32 cnt)
+{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ u32 seqnum, pkt_nb;
+
+ seqnum = ntohl(*data) & 0xffff;
+ pkt_nb = ntohl(*data) >> 16;
+ ksft_print_msg("%u:%u ", pkt_nb, seqnum);
+ data++;
+ }
+}
+
+static void pkt_dump(void *pkt, u32 len, bool eth_header)
+{
+ struct ethhdr *ethhdr = pkt;
+ u32 i, *data;
+
+ if (eth_header) {
+ /*extract L2 frame */
+ ksft_print_msg("DEBUG>> L2: dst mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ ksft_print_msg("%02X", ethhdr->h_dest[i]);
+
+ ksft_print_msg("\nDEBUG>> L2: src mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ ksft_print_msg("%02X", ethhdr->h_source[i]);
+
+ data = pkt + PKT_HDR_SIZE;
+ } else {
+ data = pkt;
+ }
+
+ /*extract L5 frame */
+ ksft_print_msg("\nDEBUG>> L5: seqnum: ");
+ pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
+ ksft_print_msg("....");
+ if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
+ ksft_print_msg("\n.... ");
+ pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
+ PKT_DUMP_NB_TO_PRINT);
+ }
+ ksft_print_msg("\n---------------------------------------\n");
+}
+
+static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
+{
+ u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
+ u32 offset = addr % umem->frame_size, expected_offset;
+ int pkt_offset = pkt->valid ? pkt->offset : 0;
+
+ if (!umem->unaligned_mode)
+ pkt_offset = 0;
+
+ expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
+
+ if (offset == expected_offset)
+ return true;
+
+ ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
+ return false;
+}
+
+static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
+{
+ void *data = xsk_umem__get_data(buffer, addr);
+ struct xdp_info *meta = data - sizeof(struct xdp_info);
+
+ if (meta->count != pkt->pkt_nb) {
+ ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
+ __func__, pkt->pkt_nb,
+ (unsigned long long)meta->count);
+ return false;
+ }
+
+ return true;
+}
+
+static bool is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx)
+{
+ struct bpf_map *data_map;
+ int adjust_value = 0;
+ int key = 0;
+ int ret;
+
+ data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
+ if (!data_map || !bpf_map__is_internal(data_map)) {
+ ksft_print_msg("Error: could not find bss section of XDP program\n");
+ exit_with_error(errno);
+ }
+
+ ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value);
+ if (ret) {
+ ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret);
+ exit_with_error(errno);
+ }
+
+ /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail
+ * helper is not supported. Skip the adjust_tail test case in this scenario.
+ */
+ return adjust_value != -EOPNOTSUPP;
+}
+
+static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
+ u32 bytes_processed)
+{
+ u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum;
+ void *data = xsk_umem__get_data(umem->buffer, addr);
+
+ addr -= umem->base_addr;
+
+ if (addr >= umem->num_frames * umem->frame_size ||
+ addr + len > umem->num_frames * umem->frame_size) {
+ ksft_print_msg("Frag invalid addr: %llx len: %u\n",
+ (unsigned long long)addr, len);
+ return false;
+ }
+ if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
+ ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
+ (unsigned long long)addr, len);
+ return false;
+ }
+
+ pkt_data = data;
+ if (!bytes_processed) {
+ pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data);
+ len -= PKT_HDR_SIZE;
+ } else {
+ bytes_processed -= PKT_HDR_SIZE;
+ }
+
+ expected_seqnum = bytes_processed / sizeof(*pkt_data);
+ seqnum = ntohl(*pkt_data) & 0xffff;
+ pkt_nb = ntohl(*pkt_data) >> 16;
+
+ if (expected_pkt_nb != pkt_nb) {
+ ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n",
+ __func__, expected_pkt_nb, pkt_nb);
+ goto error;
+ }
+ if (expected_seqnum != seqnum) {
+ ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n",
+ __func__, expected_seqnum, seqnum);
+ goto error;
+ }
+
+ words_to_end = len / sizeof(*pkt_data) - 1;
+ pkt_data += words_to_end;
+ seqnum = ntohl(*pkt_data) & 0xffff;
+ expected_seqnum += words_to_end;
+ if (expected_seqnum != seqnum) {
+ ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n",
+ __func__, expected_seqnum, seqnum);
+ goto error;
+ }
+
+ return true;
+
+error:
+ pkt_dump(data, len, !bytes_processed);
+ return false;
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
+{
+ if (pkt->len != len) {
+ ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n",
+ __func__, pkt->len, len);
+ pkt_dump(xsk_umem__get_data(buffer, addr), len, true);
+ return false;
+ }
+
+ return true;
+}
+
+static u32 load_value(u32 *counter)
+{
+ return __atomic_load_n(counter, __ATOMIC_ACQUIRE);
+}
+
+static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret)
+{
+ u32 max_budget = MAX_TX_BUDGET_DEFAULT;
+ u32 cons, ready_to_send;
+ int delta;
+
+ cons = load_value(xsk->tx.consumer);
+ ready_to_send = load_value(xsk->tx.producer) - cons;
+ *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+
+ delta = load_value(xsk->tx.consumer) - cons;
+ /* By default, xsk should consume exact @max_budget descs at one
+ * send in this case where hitting the max budget limit in while
+ * loop is triggered in __xsk_generic_xmit(). Please make sure that
+ * the number of descs to be sent is larger than @max_budget, or
+ * else the tx.consumer will be updated in xskq_cons_peek_desc()
+ * in time which hides the issue we try to verify.
+ */
+ if (ready_to_send > max_budget && delta != max_budget)
+ return false;
+
+ return true;
+}
+
+int kick_tx(struct xsk_socket_info *xsk)
+{
+ int ret;
+
+ if (xsk->check_consumer) {
+ if (!kick_tx_with_check(xsk, &ret))
+ return TEST_FAILURE;
+ } else {
+ ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ }
+ if (ret >= 0)
+ return TEST_PASS;
+ if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
+ usleep(100);
+ return TEST_PASS;
+ }
+ return TEST_FAILURE;
+}
+
+int kick_rx(struct xsk_socket_info *xsk)
+{
+ int ret;
+
+ ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+ if (ret < 0)
+ return TEST_FAILURE;
+
+ return TEST_PASS;
+}
+
+static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
+{
+ unsigned int rcvd;
+ u32 idx;
+ int ret;
+
+ if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+ ret = kick_tx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+ }
+
+ rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
+ if (rcvd) {
+ if (rcvd > xsk->outstanding_tx) {
+ u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
+
+ ksft_print_msg("[%s] Too many packets completed\n", __func__);
+ ksft_print_msg("Last completion address: %llx\n",
+ (unsigned long long)addr);
+ return TEST_FAILURE;
+ }
+
+ xsk_ring_cons__release(&xsk->umem->cq, rcvd);
+ xsk->outstanding_tx -= rcvd;
+ }
+
+ return TEST_PASS;
+}
+
+static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
+{
+ u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
+ u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+ struct ifobject *ifobj = test->ifobj_rx;
+ struct xsk_umem_info *umem = xsk->umem;
+ struct pollfd fds = { };
+ struct pkt *pkt;
+ u64 first_addr = 0;
+ int ret;
+
+ fds.fd = xsk_socket__fd(xsk->xsk);
+ fds.events = POLLIN;
+
+ ret = kick_rx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+
+ if (ifobj->use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ return TEST_FAILURE;
+
+ if (!ret) {
+ if (!is_umem_valid(test->ifobj_tx))
+ return TEST_PASS;
+
+ ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
+ return TEST_CONTINUE;
+ }
+
+ if (!(fds.revents & POLLIN))
+ return TEST_CONTINUE;
+ }
+
+ rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx);
+ if (!rcvd)
+ return TEST_CONTINUE;
+
+ if (ifobj->use_fill_ring) {
+ ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+ while (ret != rcvd) {
+ if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ return TEST_FAILURE;
+ }
+ ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+ }
+ }
+
+ while (frags_processed < rcvd) {
+ const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+ u64 addr = desc->addr, orig;
+
+ orig = xsk_umem__extract_addr(addr);
+ addr = xsk_umem__add_offset_to_addr(addr);
+
+ if (!nb_frags) {
+ pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
+ if (!pkt) {
+ ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
+ __func__, addr, desc->len);
+ return TEST_FAILURE;
+ }
+ }
+
+ print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n",
+ addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid);
+
+ if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
+ !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata &&
+ !is_metadata_correct(pkt, umem->buffer, addr)))
+ return TEST_FAILURE;
+
+ if (!nb_frags++)
+ first_addr = addr;
+ frags_processed++;
+ pkt_len += desc->len;
+ if (ifobj->use_fill_ring)
+ *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+
+ if (pkt_continues(desc->options))
+ continue;
+
+ /* The complete packet has been received */
+ if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
+ !is_offset_correct(umem, pkt, addr))
+ return TEST_FAILURE;
+
+ pkt_stream->nb_rx_pkts++;
+ nb_frags = 0;
+ pkt_len = 0;
+ }
+
+ if (nb_frags) {
+ /* In the middle of a packet. Start over from beginning of packet. */
+ idx_rx -= nb_frags;
+ xsk_ring_cons__cancel(&xsk->rx, nb_frags);
+ if (ifobj->use_fill_ring) {
+ idx_fq -= nb_frags;
+ xsk_ring_prod__cancel(&umem->fq, nb_frags);
+ }
+ frags_processed -= nb_frags;
+ pkt_stream_cancel(pkt_stream);
+ pkts_sent--;
+ }
+
+ if (ifobj->use_fill_ring)
+ xsk_ring_prod__submit(&umem->fq, frags_processed);
+ if (ifobj->release_rx)
+ xsk_ring_cons__release(&xsk->rx, frags_processed);
+
+ pthread_mutex_lock(&pacing_mutex);
+ pkts_in_flight -= pkts_sent;
+ pthread_mutex_unlock(&pacing_mutex);
+ pkts_sent = 0;
+
+ return TEST_CONTINUE;
+}
+
+bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num,
+ unsigned long *bitmap)
+{
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+
+ if (!pkt_stream) {
+ __set_bit(sock_num, bitmap);
+ return false;
+ }
+
+ if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) {
+ __set_bit(sock_num, bitmap);
+ if (bitmap_full(bitmap, test->nb_sockets))
+ return true;
+ }
+
+ return false;
+}
+
+static int receive_pkts(struct test_spec *test)
+{
+ struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+ DECLARE_BITMAP(bitmap, test->nb_sockets);
+ struct xsk_socket_info *xsk;
+ u32 sock_num = 0;
+ int res, ret;
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ exit_with_error(errno);
+
+ timeradd(&tv_now, &tv_timeout, &tv_end);
+
+ while (1) {
+ xsk = &test->ifobj_rx->xsk_arr[sock_num];
+
+ if ((all_packets_received(test, xsk, sock_num, bitmap)))
+ break;
+
+ res = __receive_pkts(test, xsk);
+ if (!(res == TEST_PASS || res == TEST_CONTINUE))
+ return res;
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ exit_with_error(errno);
+
+ if (timercmp(&tv_now, &tv_end, >)) {
+ ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+ return TEST_FAILURE;
+ }
+ sock_num = (sock_num + 1) % test->nb_sockets;
+ }
+
+ return TEST_PASS;
+}
+
+static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
+{
+ u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+ struct xsk_umem_info *umem = ifobject->umem;
+ bool use_poll = ifobject->use_poll;
+ struct pollfd fds = { };
+ int ret;
+
+ buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
+ /* pkts_in_flight might be negative if many invalid packets are sent */
+ if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) /
+ buffer_len)) {
+ ret = kick_tx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+ return TEST_CONTINUE;
+ }
+
+ fds.fd = xsk_socket__fd(xsk->xsk);
+ fds.events = POLLOUT;
+
+ while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) {
+ if (use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (timeout) {
+ if (ret < 0) {
+ ksft_print_msg("ERROR: [%s] Poll error %d\n",
+ __func__, errno);
+ return TEST_FAILURE;
+ }
+ if (ret == 0)
+ return TEST_PASS;
+ break;
+ }
+ if (ret <= 0) {
+ ksft_print_msg("ERROR: [%s] Poll error %d\n",
+ __func__, errno);
+ return TEST_FAILURE;
+ }
+ }
+
+ complete_pkts(xsk, xsk->batch_size);
+ }
+
+ for (i = 0; i < xsk->batch_size; i++) {
+ struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+ u32 nb_frags_left, nb_frags, bytes_written = 0;
+
+ if (!pkt)
+ break;
+
+ nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
+ if (nb_frags > xsk->batch_size - i) {
+ pkt_stream_cancel(pkt_stream);
+ xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i);
+ break;
+ }
+ nb_frags_left = nb_frags;
+
+ while (nb_frags_left--) {
+ struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+
+ tx_desc->addr = pkt_get_addr(pkt, ifobject->umem);
+ if (pkt_stream->verbatim) {
+ tx_desc->len = pkt->len;
+ tx_desc->options = pkt->options;
+ } else if (nb_frags_left) {
+ tx_desc->len = umem->frame_size;
+ tx_desc->options = XDP_PKT_CONTD;
+ } else {
+ tx_desc->len = pkt->len - bytes_written;
+ tx_desc->options = 0;
+ }
+ if (pkt->valid)
+ pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
+ bytes_written);
+ bytes_written += tx_desc->len;
+
+ print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n",
+ tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb);
+
+ if (nb_frags_left) {
+ i++;
+ if (pkt_stream->verbatim)
+ pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+ }
+ }
+
+ if (pkt && pkt->valid) {
+ valid_pkts++;
+ valid_frags += nb_frags;
+ }
+ }
+
+ pthread_mutex_lock(&pacing_mutex);
+ pkts_in_flight += valid_pkts;
+ pthread_mutex_unlock(&pacing_mutex);
+
+ xsk_ring_prod__submit(&xsk->tx, i);
+ xsk->outstanding_tx += valid_frags;
+
+ if (use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret <= 0) {
+ if (ret == 0 && timeout)
+ return TEST_PASS;
+
+ ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
+ return TEST_FAILURE;
+ }
+ }
+
+ if (!timeout) {
+ if (complete_pkts(xsk, i))
+ return TEST_FAILURE;
+
+ usleep(10);
+ return TEST_PASS;
+ }
+
+ return TEST_CONTINUE;
+}
+
+static int wait_for_tx_completion(struct xsk_socket_info *xsk)
+{
+ struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+ int ret;
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ exit_with_error(errno);
+ timeradd(&tv_now, &tv_timeout, &tv_end);
+
+ while (xsk->outstanding_tx) {
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ exit_with_error(errno);
+ if (timercmp(&tv_now, &tv_end, >)) {
+ ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__);
+ return TEST_FAILURE;
+ }
+
+ complete_pkts(xsk, xsk->batch_size);
+ }
+
+ return TEST_PASS;
+}
+
+bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
+{
+ return bitmap_full(bitmap, test->nb_sockets);
+}
+
+static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
+{
+ bool timeout = !is_umem_valid(test->ifobj_rx);
+ DECLARE_BITMAP(bitmap, test->nb_sockets);
+ u32 i, ret;
+
+ while (!(all_packets_sent(test, bitmap))) {
+ for (i = 0; i < test->nb_sockets; i++) {
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = ifobject->xsk_arr[i].pkt_stream;
+ if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) {
+ __set_bit(i, bitmap);
+ continue;
+ }
+ ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
+ if (ret == TEST_CONTINUE && !test->fail)
+ continue;
+
+ if ((ret || test->fail) && !timeout)
+ return TEST_FAILURE;
+
+ if (ret == TEST_PASS && timeout)
+ return ret;
+
+ ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
+ if (ret)
+ return TEST_FAILURE;
+ }
+ }
+
+ return TEST_PASS;
+}
+
+static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
+{
+ int fd = xsk_socket__fd(xsk), err;
+ socklen_t optlen, expected_len;
+
+ optlen = sizeof(*stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
+ if (err) {
+ ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return TEST_FAILURE;
+ }
+
+ expected_len = sizeof(struct xdp_statistics);
+ if (optlen != expected_len) {
+ ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
+ __func__, expected_len, optlen);
+ return TEST_FAILURE;
+ }
+
+ return TEST_PASS;
+}
+
+static int validate_rx_dropped(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ struct xdp_statistics stats;
+ int err;
+
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
+
+ err = get_xsk_stats(xsk, &stats);
+ if (err)
+ return TEST_FAILURE;
+
+ /* The receiver calls getsockopt after receiving the last (valid)
+ * packet which is not the final packet sent in this test (valid and
+ * invalid packets are sent in alternating fashion with the final
+ * packet being invalid). Since the last packet may or may not have
+ * been dropped already, both outcomes must be allowed.
+ */
+ if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 ||
+ stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1)
+ return TEST_PASS;
+
+ return TEST_FAILURE;
+}
+
+static int validate_rx_full(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ struct xdp_statistics stats;
+ int err;
+
+ usleep(1000);
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
+
+ err = get_xsk_stats(xsk, &stats);
+ if (err)
+ return TEST_FAILURE;
+
+ if (stats.rx_ring_full)
+ return TEST_PASS;
+
+ return TEST_FAILURE;
+}
+
+static int validate_fill_empty(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ struct xdp_statistics stats;
+ int err;
+
+ usleep(1000);
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
+
+ err = get_xsk_stats(xsk, &stats);
+ if (err)
+ return TEST_FAILURE;
+
+ if (stats.rx_fill_ring_empty_descs)
+ return TEST_PASS;
+
+ return TEST_FAILURE;
+}
+
+static int validate_tx_invalid_descs(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
+ struct xdp_statistics stats;
+ socklen_t optlen;
+ int err;
+
+ optlen = sizeof(stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+ if (err) {
+ ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return TEST_FAILURE;
+ }
+
+ if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
+ ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
+ __func__,
+ (unsigned long long)stats.tx_invalid_descs,
+ ifobject->xsk->pkt_stream->nb_pkts);
+ return TEST_FAILURE;
+ }
+
+ return TEST_PASS;
+}
+
+static void xsk_configure(struct test_spec *test, struct ifobject *ifobject,
+ struct xsk_umem_info *umem, bool tx)
+{
+ int i, ret;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ bool shared = (ifobject->shared_umem && tx) ? true : !!i;
+ u32 ctr = 0;
+
+ while (ctr++ < SOCK_RECONF_CTR) {
+ ret = xsk_configure_socket(&ifobject->xsk_arr[i], umem,
+ ifobject, shared);
+ if (!ret)
+ break;
+
+ /* Retry if it fails as xsk_socket__create() is asynchronous */
+ if (ctr >= SOCK_RECONF_CTR)
+ exit_with_error(-ret);
+ usleep(USLEEP_MAX);
+ }
+ if (ifobject->busy_poll)
+ enable_busy_poll(&ifobject->xsk_arr[i]);
+ }
+}
+
+static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject)
+{
+ xsk_configure(test, ifobject, test->ifobj_rx->umem, true);
+ ifobject->xsk = &ifobject->xsk_arr[0];
+ ifobject->xskmap = test->ifobj_rx->xskmap;
+ memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
+ ifobject->umem->base_addr = 0;
+}
+
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream,
+ bool fill_up)
+{
+ u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM;
+ u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts;
+ int ret;
+
+ if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+ buffers_to_fill = umem->num_frames;
+ else
+ buffers_to_fill = umem->fill_size;
+
+ ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
+ if (ret != buffers_to_fill)
+ exit_with_error(ENOSPC);
+
+ while (filled < buffers_to_fill) {
+ struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts);
+ u64 addr;
+ u32 i;
+
+ for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) {
+ if (!pkt) {
+ if (!fill_up)
+ break;
+ addr = filled * umem->frame_size + umem->base_addr;
+ } else if (pkt->offset >= 0) {
+ addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem);
+ } else {
+ addr = pkt->offset + umem_alloc_buffer(umem);
+ }
+
+ *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
+ if (++filled >= buffers_to_fill)
+ break;
+ }
+ }
+ xsk_ring_prod__submit(&umem->fq, filled);
+ xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled);
+
+ pkt_stream_reset(pkt_stream);
+ umem_reset_alloc(umem);
+}
+
+static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
+{
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ int mmap_flags;
+ u64 umem_sz;
+ void *bufs;
+ int ret;
+ u32 i;
+
+ umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
+ mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+
+ if (ifobject->umem->unaligned_mode)
+ mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB;
+
+ if (ifobject->shared_umem)
+ umem_sz *= 2;
+
+ bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+ if (bufs == MAP_FAILED)
+ exit_with_error(errno);
+
+ ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz);
+ if (ret)
+ exit_with_error(-ret);
+
+ xsk_configure(test, ifobject, ifobject->umem, false);
+
+ ifobject->xsk = &ifobject->xsk_arr[0];
+
+ if (!ifobject->rx_on)
+ return;
+
+ xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, ifobject->use_fill_ring);
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ ifobject->xsk = &ifobject->xsk_arr[i];
+ ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i);
+ if (ret)
+ exit_with_error(errno);
+ }
+}
+
+void *worker_testapp_validate_tx(void *arg)
+{
+ struct test_spec *test = (struct test_spec *)arg;
+ struct ifobject *ifobject = test->ifobj_tx;
+ int err;
+
+ if (test->current_step == 1) {
+ if (!ifobject->shared_umem)
+ thread_common_ops(test, ifobject);
+ else
+ thread_common_ops_tx(test, ifobject);
+ }
+
+ err = send_pkts(test, ifobject);
+
+ if (!err && ifobject->validation_func)
+ err = ifobject->validation_func(ifobject);
+ if (err)
+ test->fail = true;
+
+ pthread_exit(NULL);
+}
+
+void *worker_testapp_validate_rx(void *arg)
+{
+ struct test_spec *test = (struct test_spec *)arg;
+ struct ifobject *ifobject = test->ifobj_rx;
+ int err;
+
+ if (test->current_step == 1) {
+ thread_common_ops(test, ifobject);
+ } else {
+ xsk_clear_xskmap(ifobject->xskmap);
+ err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0);
+ if (err) {
+ ksft_print_msg("Error: Failed to update xskmap, error %s\n",
+ strerror(-err));
+ exit_with_error(-err);
+ }
+ }
+
+ pthread_barrier_wait(&barr);
+
+ err = receive_pkts(test);
+
+ if (!err && ifobject->validation_func)
+ err = ifobject->validation_func(ifobject);
+
+ if (err) {
+ if (test->adjust_tail && !is_adjust_tail_supported(ifobject->xdp_progs))
+ test->adjust_tail_support = false;
+ else
+ test->fail = true;
+ }
+
+ pthread_exit(NULL);
+}
+
+static void testapp_clean_xsk_umem(struct ifobject *ifobj)
+{
+ u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size;
+
+ if (ifobj->shared_umem)
+ umem_sz *= 2;
+
+ umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+ xsk_umem__delete(ifobj->umem->umem);
+ munmap(ifobj->umem->buffer, umem_sz);
+}
+
+static void handler(int signum)
+{
+ pthread_exit(NULL);
+}
+
+static bool xdp_prog_changed_rx(struct test_spec *test)
+{
+ struct ifobject *ifobj = test->ifobj_rx;
+
+ return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
+}
+
+static bool xdp_prog_changed_tx(struct test_spec *test)
+{
+ struct ifobject *ifobj = test->ifobj_tx;
+
+ return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode;
+}
+
+static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
+ struct bpf_map *xskmap, enum test_mode mode)
+{
+ int err;
+
+ xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
+ err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
+ if (err) {
+ ksft_print_msg("Error attaching XDP program\n");
+ exit_with_error(-err);
+ }
+
+ if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC))
+ if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) {
+ ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
+ exit_with_error(EINVAL);
+ }
+
+ ifobj->xdp_prog = xdp_prog;
+ ifobj->xskmap = xskmap;
+ ifobj->mode = mode;
+}
+
+static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
+ struct ifobject *ifobj_tx)
+{
+ if (xdp_prog_changed_rx(test))
+ xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
+
+ if (!ifobj_tx || ifobj_tx->shared_umem)
+ return;
+
+ if (xdp_prog_changed_tx(test))
+ xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
+}
+
+static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1,
+ struct ifobject *ifobj2)
+{
+ pthread_t t0, t1;
+ int err;
+
+ if (test->mtu > MAX_ETH_PKT_SIZE) {
+ if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp ||
+ (ifobj2 && !ifobj2->multi_buff_zc_supp))) {
+ ksft_print_msg("Multi buffer for zero-copy not supported.\n");
+ return TEST_SKIP;
+ }
+ if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp ||
+ (ifobj2 && !ifobj2->multi_buff_supp))) {
+ ksft_print_msg("Multi buffer not supported.\n");
+ return TEST_SKIP;
+ }
+ }
+ err = test_spec_set_mtu(test, test->mtu);
+ if (err) {
+ ksft_print_msg("Error, could not set mtu.\n");
+ exit_with_error(err);
+ }
+
+ if (ifobj2) {
+ if (pthread_barrier_init(&barr, NULL, 2))
+ exit_with_error(errno);
+ pkt_stream_reset(ifobj2->xsk->pkt_stream);
+ }
+
+ test->current_step++;
+ pkt_stream_reset(ifobj1->xsk->pkt_stream);
+ pkts_in_flight = 0;
+
+ signal(SIGUSR1, handler);
+ /*Spawn RX thread */
+ pthread_create(&t0, NULL, ifobj1->func_ptr, test);
+
+ if (ifobj2) {
+ pthread_barrier_wait(&barr);
+ if (pthread_barrier_destroy(&barr))
+ exit_with_error(errno);
+
+ /*Spawn TX thread */
+ pthread_create(&t1, NULL, ifobj2->func_ptr, test);
+
+ pthread_join(t1, NULL);
+ }
+
+ if (!ifobj2)
+ pthread_kill(t0, SIGUSR1);
+ else
+ pthread_join(t0, NULL);
+
+ if (test->total_steps == test->current_step || test->fail) {
+ u32 i;
+
+ if (ifobj2)
+ for (i = 0; i < test->nb_sockets; i++)
+ xsk_socket__delete(ifobj2->xsk_arr[i].xsk);
+
+ for (i = 0; i < test->nb_sockets; i++)
+ xsk_socket__delete(ifobj1->xsk_arr[i].xsk);
+
+ testapp_clean_xsk_umem(ifobj1);
+ if (ifobj2 && !ifobj2->shared_umem)
+ testapp_clean_xsk_umem(ifobj2);
+ }
+
+ return !!test->fail;
+}
+
+static int testapp_validate_traffic(struct test_spec *test)
+{
+ struct ifobject *ifobj_rx = test->ifobj_rx;
+ struct ifobject *ifobj_tx = test->ifobj_tx;
+
+ if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) ||
+ (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) {
+ ksft_print_msg("No huge pages present.\n");
+ return TEST_SKIP;
+ }
+
+ if (test->set_ring) {
+ if (ifobj_tx->hw_ring_size_supp) {
+ if (set_ring_size(ifobj_tx)) {
+ ksft_print_msg("Failed to change HW ring size.\n");
+ return TEST_FAILURE;
+ }
+ } else {
+ ksft_print_msg("Changing HW ring size not supported.\n");
+ return TEST_SKIP;
+ }
+ }
+
+ xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx);
+ return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
+}
+
+static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj)
+{
+ return __testapp_validate_traffic(test, ifobj, NULL);
+}
+
+int testapp_teardown(struct test_spec *test)
+{
+ int i;
+
+ for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
+ if (testapp_validate_traffic(test))
+ return TEST_FAILURE;
+ test_spec_reset(test);
+ }
+
+ return TEST_PASS;
+}
+
+static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
+{
+ thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
+ struct ifobject *tmp_ifobj = (*ifobj1);
+
+ (*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
+ (*ifobj2)->func_ptr = tmp_func_ptr;
+
+ *ifobj1 = *ifobj2;
+ *ifobj2 = tmp_ifobj;
+}
+
+int testapp_bidirectional(struct test_spec *test)
+{
+ int res;
+
+ test->ifobj_tx->rx_on = true;
+ test->ifobj_rx->tx_on = true;
+ test->total_steps = 2;
+ if (testapp_validate_traffic(test))
+ return TEST_FAILURE;
+
+ print_verbose("Switching Tx/Rx direction\n");
+ swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+ res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
+
+ swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+ return res;
+}
+
+static int swap_xsk_resources(struct test_spec *test)
+{
+ int ret;
+
+ test->ifobj_tx->xsk_arr[0].pkt_stream = NULL;
+ test->ifobj_rx->xsk_arr[0].pkt_stream = NULL;
+ test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default;
+ test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default;
+ test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1];
+ test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1];
+
+ ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0);
+ if (ret)
+ return TEST_FAILURE;
+
+ return TEST_PASS;
+}
+
+int testapp_xdp_prog_cleanup(struct test_spec *test)
+{
+ test->total_steps = 2;
+ test->nb_sockets = 2;
+ if (testapp_validate_traffic(test))
+ return TEST_FAILURE;
+
+ if (swap_xsk_resources(test))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_headroom(struct test_spec *test)
+{
+ test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_stats_rx_dropped(struct test_spec *test)
+{
+ if (test->mode == TEST_MODE_ZC) {
+ ksft_print_msg("Can not run RX_DROPPED test for ZC mode\n");
+ return TEST_SKIP;
+ }
+
+ pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
+ test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
+ XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
+ pkt_stream_receive_half(test);
+ test->ifobj_rx->validation_func = validate_rx_dropped;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_stats_tx_invalid_descs(struct test_spec *test)
+{
+ pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
+ test->ifobj_tx->validation_func = validate_tx_invalid_descs;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_stats_rx_full(struct test_spec *test)
+{
+ pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+
+ test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
+ test->ifobj_rx->release_rx = false;
+ test->ifobj_rx->validation_func = validate_rx_full;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_stats_fill_empty(struct test_spec *test)
+{
+ pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+
+ test->ifobj_rx->use_fill_ring = false;
+ test->ifobj_rx->validation_func = validate_fill_empty;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_unaligned(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ /* Let half of the packets straddle a 4K buffer boundary */
+ pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2);
+
+ return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_unaligned_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
+ return testapp_validate_traffic(test);
+}
+
+int testapp_single_pkt(struct test_spec *test)
+{
+ struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
+
+ pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
+ return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
+
+ return testapp_validate_traffic(test);
+}
+
+int testapp_invalid_desc_mb(struct test_spec *test)
+{
+ struct xsk_umem_info *umem = test->ifobj_tx->umem;
+ u64 umem_size = umem->num_frames * umem->frame_size;
+ struct pkt pkts[] = {
+ /* Valid packet for synch to start with */
+ {0, MIN_PKT_SIZE, 0, true, 0},
+ /* Zero frame len is not legal */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, 0, 0, false, 0},
+ /* Invalid address in the second frame */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ /* Invalid len in the middle */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ /* Invalid options in the middle */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION},
+ /* Transmit 2 frags, receive 3 */
+ {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD},
+ {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0},
+ /* Middle frame crosses chunk boundary with small length */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0},
+ /* Valid packet for synch so that something is received */
+ {0, MIN_PKT_SIZE, 0, true, 0}};
+
+ if (umem->unaligned_mode) {
+ /* Crossing a chunk boundary allowed */
+ pkts[12].valid = true;
+ pkts[13].valid = true;
+ }
+
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
+ return testapp_validate_traffic(test);
+}
+
+int testapp_invalid_desc(struct test_spec *test)
+{
+ struct xsk_umem_info *umem = test->ifobj_tx->umem;
+ u64 umem_size = umem->num_frames * umem->frame_size;
+ struct pkt pkts[] = {
+ /* Zero packet address allowed */
+ {0, MIN_PKT_SIZE, 0, true},
+ /* Allowed packet */
+ {0, MIN_PKT_SIZE, 0, true},
+ /* Straddling the start of umem */
+ {-2, MIN_PKT_SIZE, 0, false},
+ /* Packet too large */
+ {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
+ /* Up to end of umem allowed */
+ {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true},
+ /* After umem ends */
+ {umem_size, MIN_PKT_SIZE, 0, false},
+ /* Straddle the end of umem */
+ {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+ /* Straddle a 4K boundary */
+ {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+ /* Straddle a 2K boundary */
+ {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true},
+ /* Valid packet for synch so that something is received */
+ {0, MIN_PKT_SIZE, 0, true}};
+
+ if (umem->unaligned_mode) {
+ /* Crossing a page boundary allowed */
+ pkts[7].valid = true;
+ }
+ if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
+ /* Crossing a 2K frame size boundary not allowed */
+ pkts[8].valid = false;
+ }
+
+ if (test->ifobj_tx->shared_umem) {
+ pkts[4].offset += umem_size;
+ pkts[5].offset += umem_size;
+ pkts[6].offset += umem_size;
+ }
+
+ pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
+ return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_drop(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ pkt_stream_receive_half(test);
+ return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_metadata_copy(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
+ skel_tx->progs.xsk_xdp_populate_metadata,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+ test->ifobj_rx->use_metadata = true;
+
+ skel_rx->bss->count = 0;
+
+ return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_shared_umem(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test->total_steps = 1;
+ test->nb_sockets = 2;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem,
+ skel_tx->progs.xsk_xdp_shared_umem,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ pkt_stream_even_odd_sequence(test);
+
+ return testapp_validate_traffic(test);
+}
+
+int testapp_poll_txq_tmout(struct test_spec *test)
+{
+ test->ifobj_tx->use_poll = true;
+ /* create invalid frame by set umem frame_size and pkt length equal to 2048 */
+ test->ifobj_tx->umem->frame_size = 2048;
+ pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
+ return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
+}
+
+int testapp_poll_rxq_tmout(struct test_spec *test)
+{
+ test->ifobj_rx->use_poll = true;
+ return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
+}
+
+int testapp_too_many_frags(struct test_spec *test)
+{
+ struct pkt *pkts;
+ u32 max_frags, i;
+ int ret;
+
+ if (test->mode == TEST_MODE_ZC) {
+ max_frags = test->ifobj_tx->xdp_zc_max_segs;
+ } else {
+ max_frags = get_max_skb_frags();
+ if (!max_frags) {
+ ksft_print_msg("Can't get MAX_SKB_FRAGS from system, using default (17)\n");
+ max_frags = 17;
+ }
+ max_frags += 1;
+ }
+
+ pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
+ if (!pkts)
+ return TEST_FAILURE;
+
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+
+ /* Valid packet for synch */
+ pkts[0].len = MIN_PKT_SIZE;
+ pkts[0].valid = true;
+
+ /* One valid packet with the max amount of frags */
+ for (i = 1; i < max_frags + 1; i++) {
+ pkts[i].len = MIN_PKT_SIZE;
+ pkts[i].options = XDP_PKT_CONTD;
+ pkts[i].valid = true;
+ }
+ pkts[max_frags].options = 0;
+
+ /* An invalid packet with the max amount of frags but signals packet
+ * continues on the last frag
+ */
+ for (i = max_frags + 1; i < 2 * max_frags + 1; i++) {
+ pkts[i].len = MIN_PKT_SIZE;
+ pkts[i].options = XDP_PKT_CONTD;
+ pkts[i].valid = false;
+ }
+
+ /* Valid packet for synch */
+ pkts[2 * max_frags + 1].len = MIN_PKT_SIZE;
+ pkts[2 * max_frags + 1].valid = true;
+
+ pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2);
+ ret = testapp_validate_traffic(test);
+
+ free(pkts);
+ return ret;
+}
+
+static int xsk_load_xdp_programs(struct ifobject *ifobj)
+{
+ ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
+ if (libbpf_get_error(ifobj->xdp_progs))
+ return libbpf_get_error(ifobj->xdp_progs);
+
+ return 0;
+}
+
+/* Simple test */
+static bool hugepages_present(void)
+{
+ size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ void *bufs;
+
+ bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB);
+ if (bufs == MAP_FAILED)
+ return false;
+
+ mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+ munmap(bufs, mmap_sz);
+ return true;
+}
+
+void init_iface(struct ifobject *ifobj, thread_func_t func_ptr)
+{
+ LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+ int err;
+
+ ifobj->func_ptr = func_ptr;
+
+ err = xsk_load_xdp_programs(ifobj);
+ if (err) {
+ ksft_print_msg("Error loading XDP program\n");
+ exit_with_error(err);
+ }
+
+ if (hugepages_present())
+ ifobj->unaligned_supp = true;
+
+ err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (err) {
+ ksft_print_msg("Error querying XDP capabilities\n");
+ exit_with_error(-err);
+ }
+ if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG)
+ ifobj->multi_buff_supp = true;
+ if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
+ if (query_opts.xdp_zc_max_segs > 1) {
+ ifobj->multi_buff_zc_supp = true;
+ ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs;
+ } else {
+ ifobj->xdp_zc_max_segs = 0;
+ }
+ }
+}
+
+int testapp_send_receive(struct test_spec *test)
+{
+ return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_2k_frame(struct test_spec *test)
+{
+ test->ifobj_tx->umem->frame_size = 2048;
+ test->ifobj_rx->umem->frame_size = 2048;
+ pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ return testapp_validate_traffic(test);
+}
+
+int testapp_poll_rx(struct test_spec *test)
+{
+ test->ifobj_rx->use_poll = true;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_poll_tx(struct test_spec *test)
+{
+ test->ifobj_tx->use_poll = true;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_aligned_inv_desc(struct test_spec *test)
+{
+ return testapp_invalid_desc(test);
+}
+
+int testapp_aligned_inv_desc_2k_frame(struct test_spec *test)
+{
+ test->ifobj_tx->umem->frame_size = 2048;
+ test->ifobj_rx->umem->frame_size = 2048;
+ return testapp_invalid_desc(test);
+}
+
+int testapp_unaligned_inv_desc(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ return testapp_invalid_desc(test);
+}
+
+int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test)
+{
+ u64 page_size, umem_size;
+
+ /* Odd frame size so the UMEM doesn't end near a page boundary. */
+ test->ifobj_tx->umem->frame_size = 4001;
+ test->ifobj_rx->umem->frame_size = 4001;
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ /* This test exists to test descriptors that staddle the end of
+ * the UMEM but not a page.
+ */
+ page_size = sysconf(_SC_PAGESIZE);
+ umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
+ assert(umem_size % page_size > MIN_PKT_SIZE);
+ assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
+
+ return testapp_invalid_desc(test);
+}
+
+int testapp_aligned_inv_desc_mb(struct test_spec *test)
+{
+ return testapp_invalid_desc_mb(test);
+}
+
+int testapp_unaligned_inv_desc_mb(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ return testapp_invalid_desc_mb(test);
+}
+
+int testapp_xdp_metadata(struct test_spec *test)
+{
+ return testapp_xdp_metadata_copy(test);
+}
+
+int testapp_xdp_metadata_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ return testapp_xdp_metadata_copy(test);
+}
+
+int testapp_hw_sw_min_ring_size(struct test_spec *test)
+{
+ int ret;
+
+ test->set_ring = true;
+ test->total_steps = 2;
+ test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE;
+ test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2;
+ test->ifobj_tx->xsk->batch_size = 1;
+ test->ifobj_rx->xsk->batch_size = 1;
+ ret = testapp_validate_traffic(test);
+ if (ret)
+ return ret;
+
+ /* Set batch size to hw_ring_size - 1 */
+ test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+ test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+ return testapp_validate_traffic(test);
+}
+
+int testapp_hw_sw_max_ring_size(struct test_spec *test)
+{
+ u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4;
+ int ret;
+
+ test->set_ring = true;
+ test->total_steps = 2;
+ test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending;
+ test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending;
+ test->ifobj_rx->umem->num_frames = max_descs;
+ test->ifobj_rx->umem->fill_size = max_descs;
+ test->ifobj_rx->umem->comp_size = max_descs;
+ test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+
+ ret = testapp_validate_traffic(test);
+ if (ret)
+ return ret;
+
+ /* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when
+ * updating the Rx HW tail register.
+ */
+ test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+ test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+ pkt_stream_replace(test, max_descs, MIN_PKT_SIZE);
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail,
+ skel_tx->progs.xsk_xdp_adjust_tail,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ skel_rx->bss->adjust_value = adjust_value;
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len)
+{
+ int ret;
+
+ test->adjust_tail_support = true;
+ test->adjust_tail = true;
+ test->total_steps = 1;
+
+ pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len);
+ pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value);
+
+ ret = testapp_xdp_adjust_tail(test, value);
+ if (ret)
+ return ret;
+
+ if (!test->adjust_tail_support) {
+ ksft_print_msg("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n",
+ mode_string(test), busy_poll_string(test));
+ return TEST_SKIP;
+ }
+
+ return 0;
+}
+
+int testapp_adjust_tail_shrink(struct test_spec *test)
+{
+ /* Shrink by 4 bytes for testing purpose */
+ return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2);
+}
+
+int testapp_adjust_tail_shrink_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ /* Shrink by the frag size */
+ return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+int testapp_adjust_tail_grow(struct test_spec *test)
+{
+ /* Grow by 4 bytes for testing purpose */
+ return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2);
+}
+
+int testapp_adjust_tail_grow_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */
+ return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1,
+ XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+int testapp_tx_queue_consumer(struct test_spec *test)
+{
+ int nr_packets;
+
+ if (test->mode == TEST_MODE_ZC) {
+ ksft_print_msg("Can not run TX_QUEUE_CONSUMER test for ZC mode\n");
+ return TEST_SKIP;
+ }
+
+ nr_packets = MAX_TX_BUDGET_DEFAULT + 1;
+ pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE);
+ test->ifobj_tx->xsk->batch_size = nr_packets;
+ test->ifobj_tx->xsk->check_consumer = true;
+
+ return testapp_validate_traffic(test);
+}
+
+struct ifobject *ifobject_create(void)
+{
+ struct ifobject *ifobj;
+
+ ifobj = calloc(1, sizeof(struct ifobject));
+ if (!ifobj)
+ return NULL;
+
+ ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
+ if (!ifobj->xsk_arr)
+ goto out_xsk_arr;
+
+ ifobj->umem = calloc(1, sizeof(*ifobj->umem));
+ if (!ifobj->umem)
+ goto out_umem;
+
+ return ifobj;
+
+out_umem:
+ free(ifobj->xsk_arr);
+out_xsk_arr:
+ free(ifobj);
+ return NULL;
+}
+
+void ifobject_delete(struct ifobject *ifobj)
+{
+ free(ifobj->umem);
+ free(ifobj->xsk_arr);
+ free(ifobj);
+}
diff --git a/tools/testing/selftests/bpf/test_xsk.h b/tools/testing/selftests/bpf/test_xsk.h
new file mode 100644
index 000000000000..fb546cab39fd
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xsk.h
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TEST_XSK_H_
+#define TEST_XSK_H_
+
+#include <linux/ethtool.h>
+#include <linux/if_xdp.h>
+
+#include "../kselftest.h"
+#include "xsk.h"
+
+#ifndef SO_PREFER_BUSY_POLL
+#define SO_PREFER_BUSY_POLL 69
+#endif
+
+#ifndef SO_BUSY_POLL_BUDGET
+#define SO_BUSY_POLL_BUDGET 70
+#endif
+
+#define TEST_PASS 0
+#define TEST_FAILURE -1
+#define TEST_CONTINUE 1
+#define TEST_SKIP 2
+
+#define DEFAULT_PKT_CNT (4 * 1024)
+#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
+#define HUGEPAGE_SIZE (2 * 1024 * 1024)
+#define MIN_PKT_SIZE 64
+#define MAX_ETH_PKT_SIZE 1518
+#define MAX_INTERFACE_NAME_CHARS 16
+#define MAX_TEST_NAME_SIZE 48
+#define SOCK_RECONF_CTR 10
+#define USLEEP_MAX 10000
+
+extern bool opt_verbose;
+#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
+
+static void __exit_with_error(int error, const char *file, const char *func, int line)
+{
+ ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
+ strerror(error));
+ ksft_exit_xfail();
+}
+#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
+
+static inline u32 ceil_u32(u32 a, u32 b)
+{
+ return (a + b - 1) / b;
+}
+
+static inline u64 ceil_u64(u64 a, u64 b)
+{
+ return (a + b - 1) / b;
+}
+
+/* Simple test */
+enum test_mode {
+ TEST_MODE_SKB,
+ TEST_MODE_DRV,
+ TEST_MODE_ZC,
+ TEST_MODE_ALL
+};
+
+struct ifobject;
+struct test_spec;
+typedef int (*validation_func_t)(struct ifobject *ifobj);
+typedef void *(*thread_func_t)(void *arg);
+typedef int (*test_func_t)(struct test_spec *test);
+
+struct xsk_socket_info {
+ struct xsk_ring_cons rx;
+ struct xsk_ring_prod tx;
+ struct xsk_umem_info *umem;
+ struct xsk_socket *xsk;
+ struct pkt_stream *pkt_stream;
+ u32 outstanding_tx;
+ u32 rxqsize;
+ u32 batch_size;
+ u8 dst_mac[ETH_ALEN];
+ u8 src_mac[ETH_ALEN];
+ bool check_consumer;
+};
+
+int kick_rx(struct xsk_socket_info *xsk);
+int kick_tx(struct xsk_socket_info *xsk);
+
+struct xsk_umem_info {
+ struct xsk_ring_prod fq;
+ struct xsk_ring_cons cq;
+ struct xsk_umem *umem;
+ u64 next_buffer;
+ u32 num_frames;
+ u32 frame_headroom;
+ void *buffer;
+ u32 frame_size;
+ u32 base_addr;
+ u32 fill_size;
+ u32 comp_size;
+ bool unaligned_mode;
+};
+
+struct set_hw_ring {
+ u32 default_tx;
+ u32 default_rx;
+};
+
+int hw_ring_size_reset(struct ifobject *ifobj);
+
+struct ifobject {
+ char ifname[MAX_INTERFACE_NAME_CHARS];
+ struct xsk_socket_info *xsk;
+ struct xsk_socket_info *xsk_arr;
+ struct xsk_umem_info *umem;
+ thread_func_t func_ptr;
+ validation_func_t validation_func;
+ struct xsk_xdp_progs *xdp_progs;
+ struct bpf_map *xskmap;
+ struct bpf_program *xdp_prog;
+ struct ethtool_ringparam ring;
+ struct set_hw_ring set_ring;
+ enum test_mode mode;
+ int ifindex;
+ int mtu;
+ u32 bind_flags;
+ u32 xdp_zc_max_segs;
+ bool tx_on;
+ bool rx_on;
+ bool use_poll;
+ bool busy_poll;
+ bool use_fill_ring;
+ bool release_rx;
+ bool shared_umem;
+ bool use_metadata;
+ bool unaligned_supp;
+ bool multi_buff_supp;
+ bool multi_buff_zc_supp;
+ bool hw_ring_size_supp;
+};
+struct ifobject *ifobject_create(void);
+void ifobject_delete(struct ifobject *ifobj);
+void init_iface(struct ifobject *ifobj, thread_func_t func_ptr);
+
+int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, u64 size);
+int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+ struct ifobject *ifobject, bool shared);
+
+
+struct pkt {
+ int offset;
+ u32 len;
+ u32 pkt_nb;
+ bool valid;
+ u16 options;
+};
+
+struct pkt_stream {
+ u32 nb_pkts;
+ u32 current_pkt_nb;
+ struct pkt *pkts;
+ u32 max_pkt_len;
+ u32 nb_rx_pkts;
+ u32 nb_valid_entries;
+ bool verbatim;
+};
+
+static inline bool pkt_continues(u32 options)
+{
+ return options & XDP_PKT_CONTD;
+}
+
+struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len);
+void pkt_stream_delete(struct pkt_stream *pkt_stream);
+void pkt_stream_reset(struct pkt_stream *pkt_stream);
+void pkt_stream_restore_default(struct test_spec *test);
+
+struct test_spec {
+ struct ifobject *ifobj_tx;
+ struct ifobject *ifobj_rx;
+ struct pkt_stream *tx_pkt_stream_default;
+ struct pkt_stream *rx_pkt_stream_default;
+ struct bpf_program *xdp_prog_rx;
+ struct bpf_program *xdp_prog_tx;
+ struct bpf_map *xskmap_rx;
+ struct bpf_map *xskmap_tx;
+ test_func_t test_func;
+ int mtu;
+ u16 total_steps;
+ u16 current_step;
+ u16 nb_sockets;
+ bool fail;
+ bool set_ring;
+ bool adjust_tail;
+ bool adjust_tail_support;
+ enum test_mode mode;
+ char name[MAX_TEST_NAME_SIZE];
+};
+
+#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
+static inline char *mode_string(struct test_spec *test)
+{
+ switch (test->mode) {
+ case TEST_MODE_SKB:
+ return "SKB";
+ case TEST_MODE_DRV:
+ return "DRV";
+ case TEST_MODE_ZC:
+ return "ZC";
+ default:
+ return "BOGUS";
+ }
+}
+
+void test_init(struct test_spec *test, struct ifobject *ifobj_tx,
+ struct ifobject *ifobj_rx, enum test_mode mode,
+ const struct test_spec *test_to_run);
+
+int testapp_adjust_tail_grow(struct test_spec *test);
+int testapp_adjust_tail_grow_mb(struct test_spec *test);
+int testapp_adjust_tail_shrink(struct test_spec *test);
+int testapp_adjust_tail_shrink_mb(struct test_spec *test);
+int testapp_aligned_inv_desc(struct test_spec *test);
+int testapp_aligned_inv_desc_2k_frame(struct test_spec *test);
+int testapp_aligned_inv_desc_mb(struct test_spec *test);
+int testapp_bidirectional(struct test_spec *test);
+int testapp_headroom(struct test_spec *test);
+int testapp_hw_sw_max_ring_size(struct test_spec *test);
+int testapp_hw_sw_min_ring_size(struct test_spec *test);
+int testapp_poll_rx(struct test_spec *test);
+int testapp_poll_rxq_tmout(struct test_spec *test);
+int testapp_poll_tx(struct test_spec *test);
+int testapp_poll_txq_tmout(struct test_spec *test);
+int testapp_send_receive(struct test_spec *test);
+int testapp_send_receive_2k_frame(struct test_spec *test);
+int testapp_send_receive_mb(struct test_spec *test);
+int testapp_send_receive_unaligned(struct test_spec *test);
+int testapp_send_receive_unaligned_mb(struct test_spec *test);
+int testapp_single_pkt(struct test_spec *test);
+int testapp_stats_fill_empty(struct test_spec *test);
+int testapp_stats_rx_dropped(struct test_spec *test);
+int testapp_stats_tx_invalid_descs(struct test_spec *test);
+int testapp_stats_rx_full(struct test_spec *test);
+int testapp_teardown(struct test_spec *test);
+int testapp_too_many_frags(struct test_spec *test);
+int testapp_tx_queue_consumer(struct test_spec *test);
+int testapp_unaligned_inv_desc(struct test_spec *test);
+int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test);
+int testapp_unaligned_inv_desc_mb(struct test_spec *test);
+int testapp_xdp_drop(struct test_spec *test);
+int testapp_xdp_metadata(struct test_spec *test);
+int testapp_xdp_metadata_mb(struct test_spec *test);
+int testapp_xdp_prog_cleanup(struct test_spec *test);
+int testapp_xdp_shared_umem(struct test_spec *test);
+
+void *worker_testapp_validate_rx(void *arg);
+void *worker_testapp_validate_tx(void *arg);
+
+static const struct test_spec tests[] = {
+ {.name = "SEND_RECEIVE", .test_func = testapp_send_receive},
+ {.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame},
+ {.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt},
+ {.name = "POLL_RX", .test_func = testapp_poll_rx},
+ {.name = "POLL_TX", .test_func = testapp_poll_tx},
+ {.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout},
+ {.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout},
+ {.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned},
+ {.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc},
+ {.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame},
+ {.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc},
+ {.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE",
+ .test_func = testapp_unaligned_inv_desc_4001_frame},
+ {.name = "UMEM_HEADROOM", .test_func = testapp_headroom},
+ {.name = "TEARDOWN", .test_func = testapp_teardown},
+ {.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional},
+ {.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped},
+ {.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs},
+ {.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full},
+ {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty},
+ {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup},
+ {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop},
+ {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem},
+ {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata},
+ {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb},
+ {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb},
+ {.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS",
+ .test_func = testapp_send_receive_unaligned_mb},
+ {.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
+ {.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
+ {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
+ {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
+ {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
+ {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink},
+ {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
+ {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
+ {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
+ {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer},
+ };
+
+#endif /* TEST_XSK_H_ */
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index e962f133250c..1be1e353d40a 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -2580,7 +2580,7 @@ static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last
if (last && fmt == RESFMT_TABLE) {
output_header_underlines();
printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
- env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
+ env.files_processed, env.progs_processed, env.files_skipped, env.progs_skipped);
}
}

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 352adc8df2d1..8e108e316269 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -74,31 +74,23 @@
#define _GNU_SOURCE
#include <assert.h>
#include <fcntl.h>
-#include <errno.h>
#include <getopt.h>
#include <linux/if_link.h>
#include <linux/if_ether.h>
#include <linux/mman.h>
#include <linux/netdev.h>
-#include <linux/bitmap.h>
#include <linux/ethtool.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <locale.h>
-#include <poll.h>
-#include <pthread.h>
-#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <libgen.h>
-#include <string.h>
#include <stddef.h>
#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/time.h>
#include <sys/types.h>
-#include <unistd.h>

+#include "test_xsk.h"
#include "xsk_xdp_progs.skel.h"
#include "xsk.h"
#include "xskxceiver.h"
@@ -109,181 +101,12 @@

#include <network_helpers.h>

-#define MAX_TX_BUDGET_DEFAULT 32
-
-static bool opt_verbose;
static bool opt_print_tests;
static enum test_mode opt_mode = TEST_MODE_ALL;
static u32 opt_run_test = RUN_ALL_TESTS;

void test__fail(void) { /* for network_helpers.c */ }

-static void __exit_with_error(int error, const char *file, const char *func, int line)
-{
- ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
- strerror(error));
- ksft_exit_xfail();
-}
-
-#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
-#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
-static char *mode_string(struct test_spec *test)
-{
- switch (test->mode) {
- case TEST_MODE_SKB:
- return "SKB";
- case TEST_MODE_DRV:
- return "DRV";
- case TEST_MODE_ZC:
- return "ZC";
- default:
- return "BOGUS";
- }
-}
-
-static void report_failure(struct test_spec *test)
-{
- if (test->fail)
- return;
-
- ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
- test->name);
- test->fail = true;
-}
-
-/* The payload is a word consisting of a packet sequence number in the upper
- * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's
- * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0.
- */
-static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
-{
- u32 *ptr = (u32 *)dest, i;
-
- start /= sizeof(*ptr);
- size /= sizeof(*ptr);
- for (i = 0; i < size; i++)
- ptr[i] = htonl(pkt_nb << 16 | (i + start));
-}
-
-static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
-{
- memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN);
- memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN);
- eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
-}
-
-static bool is_umem_valid(struct ifobject *ifobj)
-{
- return !!ifobj->umem->umem;
-}
-
-static u32 mode_to_xdp_flags(enum test_mode mode)
-{
- return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
-}
-
-static u64 umem_size(struct xsk_umem_info *umem)
-{
- return umem->num_frames * umem->frame_size;
-}
-
-static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer,
- u64 size)
-{
- struct xsk_umem_config cfg = {
- .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
- .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
- .frame_size = umem->frame_size,
- .frame_headroom = umem->frame_headroom,
- .flags = XSK_UMEM__DEFAULT_FLAGS
- };
- int ret;
-
- if (umem->fill_size)
- cfg.fill_size = umem->fill_size;
-
- if (umem->comp_size)
- cfg.comp_size = umem->comp_size;
-
- if (umem->unaligned_mode)
- cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
-
- ret = xsk_umem__create(&umem->umem, buffer, size,
- &umem->fq, &umem->cq, &cfg);
- if (ret)
- return ret;
-
- umem->buffer = buffer;
- if (ifobj->shared_umem && ifobj->rx_on) {
- umem->base_addr = umem_size(umem);
- umem->next_buffer = umem_size(umem);
- }
-
- return 0;
-}
-
-static u64 umem_alloc_buffer(struct xsk_umem_info *umem)
-{
- u64 addr;
-
- addr = umem->next_buffer;
- umem->next_buffer += umem->frame_size;
- if (umem->next_buffer >= umem->base_addr + umem_size(umem))
- umem->next_buffer = umem->base_addr;
-
- return addr;
-}
-
-static void umem_reset_alloc(struct xsk_umem_info *umem)
-{
- umem->next_buffer = 0;
-}
-
-static void enable_busy_poll(struct xsk_socket_info *xsk)
-{
- int sock_opt;
-
- sock_opt = 1;
- if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
- (void *)&sock_opt, sizeof(sock_opt)) < 0)
- exit_with_error(errno);
-
- sock_opt = 20;
- if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
- (void *)&sock_opt, sizeof(sock_opt)) < 0)
- exit_with_error(errno);
-
- sock_opt = xsk->batch_size;
- if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
- (void *)&sock_opt, sizeof(sock_opt)) < 0)
- exit_with_error(errno);
-}
-
-static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
- struct ifobject *ifobject, bool shared)
-{
- struct xsk_socket_config cfg = {};
- struct xsk_ring_cons *rxr;
- struct xsk_ring_prod *txr;
-
- xsk->umem = umem;
- cfg.rx_size = xsk->rxqsize;
- cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
- cfg.bind_flags = ifobject->bind_flags;
- if (shared)
- cfg.bind_flags |= XDP_SHARED_UMEM;
- if (ifobject->mtu > MAX_ETH_PKT_SIZE)
- cfg.bind_flags |= XDP_USE_SG;
- if (umem->comp_size)
- cfg.tx_size = umem->comp_size;
- if (umem->fill_size)
- cfg.rx_size = umem->fill_size;
-
- txr = ifobject->tx_on ? &xsk->tx : NULL;
- rxr = ifobject->rx_on ? &xsk->rx : NULL;
- return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg);
-}
-
static bool ifobj_zc_avail(struct ifobject *ifobject)
{
size_t umem_sz = DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
@@ -314,7 +137,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject)
ifobject->bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
ifobject->rx_on = true;
xsk->rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
- ret = __xsk_configure_socket(xsk, umem, ifobject, false);
+ ret = xsk_configure_socket(xsk, umem, ifobject, false);
if (!ret)
zc_avail = true;

@@ -327,25 +150,6 @@ static bool ifobj_zc_avail(struct ifobject *ifobject)
return zc_avail;
}

-#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
-static unsigned int get_max_skb_frags(void)
-{
- unsigned int max_skb_frags = 0;
- FILE *file;
-
- file = fopen(MAX_SKB_FRAGS_PATH, "r");
- if (!file) {
- ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
- return 0;
- }
-
- if (fscanf(file, "%u", &max_skb_frags) != 1)
- ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
-
- fclose(file);
- return max_skb_frags;
-}
-
static struct option long_options[] = {
{"interface", required_argument, 0, 'i'},
{"busy-poll", no_argument, 0, 'b'},
@@ -446,2327 +250,66 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
}
}

-static int set_ring_size(struct ifobject *ifobj)
-{
- int ret;
- u32 ctr = 0;
-
- while (ctr++ < SOCK_RECONF_CTR) {
- ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring);
- if (!ret)
- break;
-
- /* Retry if it fails */
- if (ctr >= SOCK_RECONF_CTR || errno != EBUSY)
- return -errno;
-
- usleep(USLEEP_MAX);
- }
-
- return ret;
-}
-
-static int hw_ring_size_reset(struct ifobject *ifobj)
-{
- ifobj->ring.tx_pending = ifobj->set_ring.default_tx;
- ifobj->ring.rx_pending = ifobj->set_ring.default_rx;
- return set_ring_size(ifobj);
-}
-
-static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
- struct ifobject *ifobj_rx)
+static void xsk_unload_xdp_programs(struct ifobject *ifobj)
{
- u32 i, j;
-
- for (i = 0; i < MAX_INTERFACES; i++) {
- struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
-
- ifobj->xsk = &ifobj->xsk_arr[0];
- ifobj->use_poll = false;
- ifobj->use_fill_ring = true;
- ifobj->release_rx = true;
- ifobj->validation_func = NULL;
- ifobj->use_metadata = false;
-
- if (i == 0) {
- ifobj->rx_on = false;
- ifobj->tx_on = true;
- } else {
- ifobj->rx_on = true;
- ifobj->tx_on = false;
- }
-
- memset(ifobj->umem, 0, sizeof(*ifobj->umem));
- ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
- ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
-
- for (j = 0; j < MAX_SOCKETS; j++) {
- memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
- ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
- ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE;
- if (i == 0)
- ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
- else
- ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default;
-
- memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN);
- memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN);
- ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0);
- ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1);
- }
- }
-
- if (ifobj_tx->hw_ring_size_supp)
- hw_ring_size_reset(ifobj_tx);
-
- test->ifobj_tx = ifobj_tx;
- test->ifobj_rx = ifobj_rx;
- test->current_step = 0;
- test->total_steps = 1;
- test->nb_sockets = 1;
- test->fail = false;
- test->set_ring = false;
- test->adjust_tail = false;
- test->adjust_tail_support = false;
- test->mtu = MAX_ETH_PKT_SIZE;
- test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
- test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
- test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
- test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk;
+ xsk_xdp_progs__destroy(ifobj->xdp_progs);
}

-static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
- struct ifobject *ifobj_rx, enum test_mode mode,
- const struct test_spec *test_to_run)
+static void run_pkt_test(struct test_spec *test)
{
- struct pkt_stream *tx_pkt_stream;
- struct pkt_stream *rx_pkt_stream;
- u32 i;
-
- tx_pkt_stream = test->tx_pkt_stream_default;
- rx_pkt_stream = test->rx_pkt_stream_default;
- memset(test, 0, sizeof(*test));
- test->tx_pkt_stream_default = tx_pkt_stream;
- test->rx_pkt_stream_default = rx_pkt_stream;
+ int ret;

- for (i = 0; i < MAX_INTERFACES; i++) {
- struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+ ret = test->test_func(test);

- ifobj->bind_flags = XDP_USE_NEED_WAKEUP;
- if (mode == TEST_MODE_ZC)
- ifobj->bind_flags |= XDP_ZEROCOPY;
- else
- ifobj->bind_flags |= XDP_COPY;
+ switch (ret) {
+ case TEST_PASS:
+ ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
+ test->name);
+ break;
+ case TEST_SKIP:
+ ksft_test_result_skip("SKIP: %s %s%s\n", mode_string(test), busy_poll_string(test),
+ test->name);
+ break;
+ case TEST_FAILURE:
+ ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
+ test->name);
+ break;
+ default:
+ ksft_test_result_fail("FAIL: %s %s%s -- Unexpected returned value (%d)\n",
+ mode_string(test), busy_poll_string(test), test->name, ret);
}

- strncpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE);
- test->test_func = test_to_run->test_func;
- test->mode = mode;
- __test_spec_init(test, ifobj_tx, ifobj_rx);
-}
-
-static void test_spec_reset(struct test_spec *test)
-{
- __test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
+ pkt_stream_restore_default(test);
}

-static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
- struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
- struct bpf_map *xskmap_tx)
+static bool is_xdp_supported(int ifindex)
{
- test->xdp_prog_rx = xdp_prog_rx;
- test->xdp_prog_tx = xdp_prog_tx;
- test->xskmap_rx = xskmap_rx;
- test->xskmap_tx = xskmap_tx;
-}
+ int flags = XDP_FLAGS_DRV_MODE;

-static int test_spec_set_mtu(struct test_spec *test, int mtu)
-{
+ LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = flags);
+ struct bpf_insn insns[2] = {
+ BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
+ BPF_EXIT_INSN()
+ };
+ int prog_fd, insn_cnt = ARRAY_SIZE(insns);
int err;

- if (test->ifobj_rx->mtu != mtu) {
- err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu);
- if (err)
- return err;
- test->ifobj_rx->mtu = mtu;
- }
- if (test->ifobj_tx->mtu != mtu) {
- err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu);
- if (err)
- return err;
- test->ifobj_tx->mtu = mtu;
- }
-
- return 0;
-}
-
-static void pkt_stream_reset(struct pkt_stream *pkt_stream)
-{
- if (pkt_stream) {
- pkt_stream->current_pkt_nb = 0;
- pkt_stream->nb_rx_pkts = 0;
- }
-}
-
-static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
-{
- if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts)
- return NULL;
-
- return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
-}
-
-static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
-{
- while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
- (*pkts_sent)++;
- if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid)
- return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
- pkt_stream->current_pkt_nb++;
- }
- return NULL;
-}
-
-static void pkt_stream_delete(struct pkt_stream *pkt_stream)
-{
- free(pkt_stream->pkts);
- free(pkt_stream);
-}
-
-static void pkt_stream_restore_default(struct test_spec *test)
-{
- struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream;
- struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream;
-
- if (tx_pkt_stream != test->tx_pkt_stream_default) {
- pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream);
- test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default;
- }
-
- if (rx_pkt_stream != test->rx_pkt_stream_default) {
- pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
- test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default;
- }
-}
-
-static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
-{
- struct pkt_stream *pkt_stream;
-
- pkt_stream = calloc(1, sizeof(*pkt_stream));
- if (!pkt_stream)
- return NULL;
-
- pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
- if (!pkt_stream->pkts) {
- free(pkt_stream);
- return NULL;
- }
-
- pkt_stream->nb_pkts = nb_pkts;
- return pkt_stream;
-}
-
-static bool pkt_continues(u32 options)
-{
- return options & XDP_PKT_CONTD;
-}
-
-static u32 ceil_u32(u32 a, u32 b)
-{
- return (a + b - 1) / b;
-}
-
-static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt)
-{
- u32 nb_frags = 1, next_frag;
-
- if (!pkt)
- return 1;
-
- if (!pkt_stream->verbatim) {
- if (!pkt->valid || !pkt->len)
- return 1;
- return ceil_u32(pkt->len, frame_size);
- }
-
- /* Search for the end of the packet in verbatim mode */
- if (!pkt_continues(pkt->options))
- return nb_frags;
-
- next_frag = pkt_stream->current_pkt_nb;
- pkt++;
- while (next_frag++ < pkt_stream->nb_pkts) {
- nb_frags++;
- if (!pkt_continues(pkt->options) || !pkt->valid)
- break;
- pkt++;
- }
- return nb_frags;
-}
-
-static bool set_pkt_valid(int offset, u32 len)
-{
- return len <= MAX_ETH_JUMBO_SIZE;
-}
-
-static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
-{
- pkt->offset = offset;
- pkt->len = len;
- pkt->valid = set_pkt_valid(offset, len);
-}
-
-static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
-{
- bool prev_pkt_valid = pkt->valid;
-
- pkt_set(pkt_stream, pkt, offset, len);
- pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid;
-}
-
-static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
-{
- return ceil_u32(len, umem->frame_size) * umem->frame_size;
-}
-
-static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
-{
- struct pkt_stream *pkt_stream;
- u32 i;
-
- pkt_stream = __pkt_stream_alloc(nb_pkts);
- if (!pkt_stream)
- exit_with_error(ENOMEM);
-
- pkt_stream->nb_pkts = nb_pkts;
- pkt_stream->max_pkt_len = pkt_len;
- for (i = 0; i < nb_pkts; i++) {
- struct pkt *pkt = &pkt_stream->pkts[i];
-
- pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len);
- pkt->pkt_nb = nb_start + i * nb_off;
- }
-
- return pkt_stream;
-}
-
-static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
-{
- return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
-}
-
-static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
-{
- return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
-}
-
-static void pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len)
-{
- ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
-}
-
-static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
-{
- pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len);
- pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len);
-}
-
-static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
- int offset)
-{
- struct pkt_stream *pkt_stream;
- u32 i;
-
- pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
- for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
- pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
-
- ifobj->xsk->pkt_stream = pkt_stream;
-}
-
-static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
-{
- __pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset);
- __pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset);
-}
-
-static void pkt_stream_receive_half(struct test_spec *test)
-{
- struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
- u32 i;
-
- test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
- pkt_stream->pkts[0].len);
- pkt_stream = test->ifobj_rx->xsk->pkt_stream;
- for (i = 1; i < pkt_stream->nb_pkts; i += 2)
- pkt_stream->pkts[i].valid = false;
-
- pkt_stream->nb_valid_entries /= 2;
-}
-
-static void pkt_stream_even_odd_sequence(struct test_spec *test)
-{
- struct pkt_stream *pkt_stream;
- u32 i;
-
- for (i = 0; i < test->nb_sockets; i++) {
- pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
- pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
- pkt_stream->pkts[0].len, i, 2);
- test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
+ if (prog_fd < 0)
+ return false;

- pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
- pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
- pkt_stream->pkts[0].len, i, 2);
- test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
+ err = bpf_xdp_attach(ifindex, prog_fd, flags, NULL);
+ if (err) {
+ close(prog_fd);
+ return false;
}
-}
-
-static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
-{
- if (!pkt->valid)
- return pkt->offset;
- return pkt->offset + umem_alloc_buffer(umem);
-}

-static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
-{
- pkt_stream->current_pkt_nb--;
-}
-
-static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len,
- u32 pkt_nb, u32 bytes_written)
-{
- void *data = xsk_umem__get_data(umem->buffer, addr);
-
- if (len < MIN_PKT_SIZE)
- return;
-
- if (!bytes_written) {
- gen_eth_hdr(xsk, data);
-
- len -= PKT_HDR_SIZE;
- data += PKT_HDR_SIZE;
- } else {
- bytes_written -= PKT_HDR_SIZE;
- }
+ bpf_xdp_detach(ifindex, flags, NULL);
+ close(prog_fd);

- write_payload(data, pkt_nb, bytes_written, len);
+ return true;
}

-static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames,
- u32 nb_frames, bool verbatim)
-{
- u32 i, len = 0, pkt_nb = 0, payload = 0;
- struct pkt_stream *pkt_stream;
-
- pkt_stream = __pkt_stream_alloc(nb_frames);
- if (!pkt_stream)
- exit_with_error(ENOMEM);
-
- for (i = 0; i < nb_frames; i++) {
- struct pkt *pkt = &pkt_stream->pkts[pkt_nb];
- struct pkt *frame = &frames[i];
-
- pkt->offset = frame->offset;
- if (verbatim) {
- *pkt = *frame;
- pkt->pkt_nb = payload;
- if (!frame->valid || !pkt_continues(frame->options))
- payload++;
- } else {
- if (frame->valid)
- len += frame->len;
- if (frame->valid && pkt_continues(frame->options))
- continue;
-
- pkt->pkt_nb = pkt_nb;
- pkt->len = len;
- pkt->valid = frame->valid;
- pkt->options = 0;
-
- len = 0;
- }
-
- print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n",
- pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb);
-
- if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
- pkt_stream->max_pkt_len = pkt->len;
-
- if (pkt->valid)
- pkt_stream->nb_valid_entries++;
-
- pkt_nb++;
- }
-
- pkt_stream->nb_pkts = pkt_nb;
- pkt_stream->verbatim = verbatim;
- return pkt_stream;
-}
-
-static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
-{
- struct pkt_stream *pkt_stream;
-
- pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
- test->ifobj_tx->xsk->pkt_stream = pkt_stream;
-
- pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
- test->ifobj_rx->xsk->pkt_stream = pkt_stream;
-}
-
-static void pkt_print_data(u32 *data, u32 cnt)
-{
- u32 i;
-
- for (i = 0; i < cnt; i++) {
- u32 seqnum, pkt_nb;
-
- seqnum = ntohl(*data) & 0xffff;
- pkt_nb = ntohl(*data) >> 16;
- ksft_print_msg("%u:%u ", pkt_nb, seqnum);
- data++;
- }
-}
-
-static void pkt_dump(void *pkt, u32 len, bool eth_header)
-{
- struct ethhdr *ethhdr = pkt;
- u32 i, *data;
-
- if (eth_header) {
- /*extract L2 frame */
- ksft_print_msg("DEBUG>> L2: dst mac: ");
- for (i = 0; i < ETH_ALEN; i++)
- ksft_print_msg("%02X", ethhdr->h_dest[i]);
-
- ksft_print_msg("\nDEBUG>> L2: src mac: ");
- for (i = 0; i < ETH_ALEN; i++)
- ksft_print_msg("%02X", ethhdr->h_source[i]);
-
- data = pkt + PKT_HDR_SIZE;
- } else {
- data = pkt;
- }
-
- /*extract L5 frame */
- ksft_print_msg("\nDEBUG>> L5: seqnum: ");
- pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
- ksft_print_msg("....");
- if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
- ksft_print_msg("\n.... ");
- pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
- PKT_DUMP_NB_TO_PRINT);
- }
- ksft_print_msg("\n---------------------------------------\n");
-}
-
-static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
-{
- u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
- u32 offset = addr % umem->frame_size, expected_offset;
- int pkt_offset = pkt->valid ? pkt->offset : 0;
-
- if (!umem->unaligned_mode)
- pkt_offset = 0;
-
- expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
-
- if (offset == expected_offset)
- return true;
-
- ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
- return false;
-}
-
-static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
-{
- void *data = xsk_umem__get_data(buffer, addr);
- struct xdp_info *meta = data - sizeof(struct xdp_info);
-
- if (meta->count != pkt->pkt_nb) {
- ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
- __func__, pkt->pkt_nb,
- (unsigned long long)meta->count);
- return false;
- }
-
- return true;
-}
-
-static bool is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx)
-{
- struct bpf_map *data_map;
- int adjust_value = 0;
- int key = 0;
- int ret;
-
- data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
- if (!data_map || !bpf_map__is_internal(data_map)) {
- ksft_print_msg("Error: could not find bss section of XDP program\n");
- exit_with_error(errno);
- }
-
- ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value);
- if (ret) {
- ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret);
- exit_with_error(errno);
- }
-
- /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail
- * helper is not supported. Skip the adjust_tail test case in this scenario.
- */
- return adjust_value != -EOPNOTSUPP;
-}
-
-static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
- u32 bytes_processed)
-{
- u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum;
- void *data = xsk_umem__get_data(umem->buffer, addr);
-
- addr -= umem->base_addr;
-
- if (addr >= umem->num_frames * umem->frame_size ||
- addr + len > umem->num_frames * umem->frame_size) {
- ksft_print_msg("Frag invalid addr: %llx len: %u\n",
- (unsigned long long)addr, len);
- return false;
- }
- if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
- ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
- (unsigned long long)addr, len);
- return false;
- }
-
- pkt_data = data;
- if (!bytes_processed) {
- pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data);
- len -= PKT_HDR_SIZE;
- } else {
- bytes_processed -= PKT_HDR_SIZE;
- }
-
- expected_seqnum = bytes_processed / sizeof(*pkt_data);
- seqnum = ntohl(*pkt_data) & 0xffff;
- pkt_nb = ntohl(*pkt_data) >> 16;
-
- if (expected_pkt_nb != pkt_nb) {
- ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n",
- __func__, expected_pkt_nb, pkt_nb);
- goto error;
- }
- if (expected_seqnum != seqnum) {
- ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n",
- __func__, expected_seqnum, seqnum);
- goto error;
- }
-
- words_to_end = len / sizeof(*pkt_data) - 1;
- pkt_data += words_to_end;
- seqnum = ntohl(*pkt_data) & 0xffff;
- expected_seqnum += words_to_end;
- if (expected_seqnum != seqnum) {
- ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n",
- __func__, expected_seqnum, seqnum);
- goto error;
- }
-
- return true;
-
-error:
- pkt_dump(data, len, !bytes_processed);
- return false;
-}
-
-static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
-{
- if (pkt->len != len) {
- ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n",
- __func__, pkt->len, len);
- pkt_dump(xsk_umem__get_data(buffer, addr), len, true);
- return false;
- }
-
- return true;
-}
-
-static u32 load_value(u32 *counter)
-{
- return __atomic_load_n(counter, __ATOMIC_ACQUIRE);
-}
-
-static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret)
-{
- u32 max_budget = MAX_TX_BUDGET_DEFAULT;
- u32 cons, ready_to_send;
- int delta;
-
- cons = load_value(xsk->tx.consumer);
- ready_to_send = load_value(xsk->tx.producer) - cons;
- *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
-
- delta = load_value(xsk->tx.consumer) - cons;
- /* By default, xsk should consume exact @max_budget descs at one
- * send in this case where hitting the max budget limit in while
- * loop is triggered in __xsk_generic_xmit(). Please make sure that
- * the number of descs to be sent is larger than @max_budget, or
- * else the tx.consumer will be updated in xskq_cons_peek_desc()
- * in time which hides the issue we try to verify.
- */
- if (ready_to_send > max_budget && delta != max_budget)
- return false;
-
- return true;
-}
-
-static int kick_tx(struct xsk_socket_info *xsk)
-{
- int ret;
-
- if (xsk->check_consumer) {
- if (!kick_tx_with_check(xsk, &ret))
- return TEST_FAILURE;
- } else {
- ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
- }
- if (ret >= 0)
- return TEST_PASS;
- if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
- usleep(100);
- return TEST_PASS;
- }
- return TEST_FAILURE;
-}
-
-static int kick_rx(struct xsk_socket_info *xsk)
-{
- int ret;
-
- ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
- if (ret < 0)
- return TEST_FAILURE;
-
- return TEST_PASS;
-}
-
-static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
-{
- unsigned int rcvd;
- u32 idx;
- int ret;
-
- if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
- ret = kick_tx(xsk);
- if (ret)
- return TEST_FAILURE;
- }
-
- rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
- if (rcvd) {
- if (rcvd > xsk->outstanding_tx) {
- u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
-
- ksft_print_msg("[%s] Too many packets completed\n", __func__);
- ksft_print_msg("Last completion address: %llx\n",
- (unsigned long long)addr);
- return TEST_FAILURE;
- }
-
- xsk_ring_cons__release(&xsk->umem->cq, rcvd);
- xsk->outstanding_tx -= rcvd;
- }
-
- return TEST_PASS;
-}
-
-static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
-{
- u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
- u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
- struct pkt_stream *pkt_stream = xsk->pkt_stream;
- struct ifobject *ifobj = test->ifobj_rx;
- struct xsk_umem_info *umem = xsk->umem;
- struct pollfd fds = { };
- struct pkt *pkt;
- u64 first_addr = 0;
- int ret;
-
- fds.fd = xsk_socket__fd(xsk->xsk);
- fds.events = POLLIN;
-
- ret = kick_rx(xsk);
- if (ret)
- return TEST_FAILURE;
-
- if (ifobj->use_poll) {
- ret = poll(&fds, 1, POLL_TMOUT);
- if (ret < 0)
- return TEST_FAILURE;
-
- if (!ret) {
- if (!is_umem_valid(test->ifobj_tx))
- return TEST_PASS;
-
- ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
- return TEST_CONTINUE;
- }
-
- if (!(fds.revents & POLLIN))
- return TEST_CONTINUE;
- }
-
- rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx);
- if (!rcvd)
- return TEST_CONTINUE;
-
- if (ifobj->use_fill_ring) {
- ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
- while (ret != rcvd) {
- if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
- ret = poll(&fds, 1, POLL_TMOUT);
- if (ret < 0)
- return TEST_FAILURE;
- }
- ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
- }
- }
-
- while (frags_processed < rcvd) {
- const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
- u64 addr = desc->addr, orig;
-
- orig = xsk_umem__extract_addr(addr);
- addr = xsk_umem__add_offset_to_addr(addr);
-
- if (!nb_frags) {
- pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
- if (!pkt) {
- ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
- __func__, addr, desc->len);
- return TEST_FAILURE;
- }
- }
-
- print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n",
- addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid);
-
- if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
- !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata &&
- !is_metadata_correct(pkt, umem->buffer, addr)))
- return TEST_FAILURE;
-
- if (!nb_frags++)
- first_addr = addr;
- frags_processed++;
- pkt_len += desc->len;
- if (ifobj->use_fill_ring)
- *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
-
- if (pkt_continues(desc->options))
- continue;
-
- /* The complete packet has been received */
- if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
- !is_offset_correct(umem, pkt, addr))
- return TEST_FAILURE;
-
- pkt_stream->nb_rx_pkts++;
- nb_frags = 0;
- pkt_len = 0;
- }
-
- if (nb_frags) {
- /* In the middle of a packet. Start over from beginning of packet. */
- idx_rx -= nb_frags;
- xsk_ring_cons__cancel(&xsk->rx, nb_frags);
- if (ifobj->use_fill_ring) {
- idx_fq -= nb_frags;
- xsk_ring_prod__cancel(&umem->fq, nb_frags);
- }
- frags_processed -= nb_frags;
- }
-
- if (ifobj->use_fill_ring)
- xsk_ring_prod__submit(&umem->fq, frags_processed);
- if (ifobj->release_rx)
- xsk_ring_cons__release(&xsk->rx, frags_processed);
-
- pthread_mutex_lock(&pacing_mutex);
- pkts_in_flight -= pkts_sent;
- pthread_mutex_unlock(&pacing_mutex);
- pkts_sent = 0;
-
-return TEST_CONTINUE;
-}
-
-bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num,
- unsigned long *bitmap)
-{
- struct pkt_stream *pkt_stream = xsk->pkt_stream;
-
- if (!pkt_stream) {
- __set_bit(sock_num, bitmap);
- return false;
- }
-
- if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) {
- __set_bit(sock_num, bitmap);
- if (bitmap_full(bitmap, test->nb_sockets))
- return true;
- }
-
- return false;
-}
-
-static int receive_pkts(struct test_spec *test)
-{
- struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
- DECLARE_BITMAP(bitmap, test->nb_sockets);
- struct xsk_socket_info *xsk;
- u32 sock_num = 0;
- int res, ret;
-
- ret = gettimeofday(&tv_now, NULL);
- if (ret)
- exit_with_error(errno);
-
- timeradd(&tv_now, &tv_timeout, &tv_end);
-
- while (1) {
- xsk = &test->ifobj_rx->xsk_arr[sock_num];
-
- if ((all_packets_received(test, xsk, sock_num, bitmap)))
- break;
-
- res = __receive_pkts(test, xsk);
- if (!(res == TEST_PASS || res == TEST_CONTINUE))
- return res;
-
- ret = gettimeofday(&tv_now, NULL);
- if (ret)
- exit_with_error(errno);
-
- if (timercmp(&tv_now, &tv_end, >)) {
- ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
- return TEST_FAILURE;
- }
- sock_num = (sock_num + 1) % test->nb_sockets;
- }
-
- return TEST_PASS;
-}
-
-static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
-{
- u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
- struct pkt_stream *pkt_stream = xsk->pkt_stream;
- struct xsk_umem_info *umem = ifobject->umem;
- bool use_poll = ifobject->use_poll;
- struct pollfd fds = { };
- int ret;
-
- buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
- /* pkts_in_flight might be negative if many invalid packets are sent */
- if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) /
- buffer_len)) {
- ret = kick_tx(xsk);
- if (ret)
- return TEST_FAILURE;
- return TEST_CONTINUE;
- }
-
- fds.fd = xsk_socket__fd(xsk->xsk);
- fds.events = POLLOUT;
-
- while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) {
- if (use_poll) {
- ret = poll(&fds, 1, POLL_TMOUT);
- if (timeout) {
- if (ret < 0) {
- ksft_print_msg("ERROR: [%s] Poll error %d\n",
- __func__, errno);
- return TEST_FAILURE;
- }
- if (ret == 0)
- return TEST_PASS;
- break;
- }
- if (ret <= 0) {
- ksft_print_msg("ERROR: [%s] Poll error %d\n",
- __func__, errno);
- return TEST_FAILURE;
- }
- }
-
- complete_pkts(xsk, xsk->batch_size);
- }
-
- for (i = 0; i < xsk->batch_size; i++) {
- struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
- u32 nb_frags_left, nb_frags, bytes_written = 0;
-
- if (!pkt)
- break;
-
- nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
- if (nb_frags > xsk->batch_size - i) {
- pkt_stream_cancel(pkt_stream);
- xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i);
- break;
- }
- nb_frags_left = nb_frags;
-
- while (nb_frags_left--) {
- struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
-
- tx_desc->addr = pkt_get_addr(pkt, ifobject->umem);
- if (pkt_stream->verbatim) {
- tx_desc->len = pkt->len;
- tx_desc->options = pkt->options;
- } else if (nb_frags_left) {
- tx_desc->len = umem->frame_size;
- tx_desc->options = XDP_PKT_CONTD;
- } else {
- tx_desc->len = pkt->len - bytes_written;
- tx_desc->options = 0;
- }
- if (pkt->valid)
- pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
- bytes_written);
- bytes_written += tx_desc->len;
-
- print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n",
- tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb);
-
- if (nb_frags_left) {
- i++;
- if (pkt_stream->verbatim)
- pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
- }
- }
-
- if (pkt && pkt->valid) {
- valid_pkts++;
- valid_frags += nb_frags;
- }
- }
-
- pthread_mutex_lock(&pacing_mutex);
- pkts_in_flight += valid_pkts;
- pthread_mutex_unlock(&pacing_mutex);
-
- xsk_ring_prod__submit(&xsk->tx, i);
- xsk->outstanding_tx += valid_frags;
-
- if (use_poll) {
- ret = poll(&fds, 1, POLL_TMOUT);
- if (ret <= 0) {
- if (ret == 0 && timeout)
- return TEST_PASS;
-
- ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
- return TEST_FAILURE;
- }
- }
-
- if (!timeout) {
- if (complete_pkts(xsk, i))
- return TEST_FAILURE;
-
- usleep(10);
- return TEST_PASS;
- }
-
- return TEST_CONTINUE;
-}
-
-static int wait_for_tx_completion(struct xsk_socket_info *xsk)
-{
- struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
- int ret;
-
- ret = gettimeofday(&tv_now, NULL);
- if (ret)
- exit_with_error(errno);
- timeradd(&tv_now, &tv_timeout, &tv_end);
-
- while (xsk->outstanding_tx) {
- ret = gettimeofday(&tv_now, NULL);
- if (ret)
- exit_with_error(errno);
- if (timercmp(&tv_now, &tv_end, >)) {
- ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__);
- return TEST_FAILURE;
- }
-
- complete_pkts(xsk, xsk->batch_size);
- }
-
- return TEST_PASS;
-}
-
-bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
-{
- return bitmap_full(bitmap, test->nb_sockets);
-}
-
-static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
-{
- bool timeout = !is_umem_valid(test->ifobj_rx);
- DECLARE_BITMAP(bitmap, test->nb_sockets);
- u32 i, ret;
-
- while (!(all_packets_sent(test, bitmap))) {
- for (i = 0; i < test->nb_sockets; i++) {
- struct pkt_stream *pkt_stream;
-
- pkt_stream = ifobject->xsk_arr[i].pkt_stream;
- if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) {
- __set_bit(i, bitmap);
- continue;
- }
- ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
- if (ret == TEST_CONTINUE && !test->fail)
- continue;
-
- if ((ret || test->fail) && !timeout)
- return TEST_FAILURE;
-
- if (ret == TEST_PASS && timeout)
- return ret;
-
- ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
- if (ret)
- return TEST_FAILURE;
- }
- }
-
- return TEST_PASS;
-}
-
-static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
-{
- int fd = xsk_socket__fd(xsk), err;
- socklen_t optlen, expected_len;
-
- optlen = sizeof(*stats);
- err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
- if (err) {
- ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
- __func__, -err, strerror(-err));
- return TEST_FAILURE;
- }
-
- expected_len = sizeof(struct xdp_statistics);
- if (optlen != expected_len) {
- ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
- __func__, expected_len, optlen);
- return TEST_FAILURE;
- }
-
- return TEST_PASS;
-}
-
-static int validate_rx_dropped(struct ifobject *ifobject)
-{
- struct xsk_socket *xsk = ifobject->xsk->xsk;
- struct xdp_statistics stats;
- int err;
-
- err = kick_rx(ifobject->xsk);
- if (err)
- return TEST_FAILURE;
-
- err = get_xsk_stats(xsk, &stats);
- if (err)
- return TEST_FAILURE;
-
- /* The receiver calls getsockopt after receiving the last (valid)
- * packet which is not the final packet sent in this test (valid and
- * invalid packets are sent in alternating fashion with the final
- * packet being invalid). Since the last packet may or may not have
- * been dropped already, both outcomes must be allowed.
- */
- if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 ||
- stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1)
- return TEST_PASS;
-
- return TEST_FAILURE;
-}
-
-static int validate_rx_full(struct ifobject *ifobject)
-{
- struct xsk_socket *xsk = ifobject->xsk->xsk;
- struct xdp_statistics stats;
- int err;
-
- usleep(1000);
- err = kick_rx(ifobject->xsk);
- if (err)
- return TEST_FAILURE;
-
- err = get_xsk_stats(xsk, &stats);
- if (err)
- return TEST_FAILURE;
-
- if (stats.rx_ring_full)
- return TEST_PASS;
-
- return TEST_FAILURE;
-}
-
-static int validate_fill_empty(struct ifobject *ifobject)
-{
- struct xsk_socket *xsk = ifobject->xsk->xsk;
- struct xdp_statistics stats;
- int err;
-
- usleep(1000);
- err = kick_rx(ifobject->xsk);
- if (err)
- return TEST_FAILURE;
-
- err = get_xsk_stats(xsk, &stats);
- if (err)
- return TEST_FAILURE;
-
- if (stats.rx_fill_ring_empty_descs)
- return TEST_PASS;
-
- return TEST_FAILURE;
-}
-
-static int validate_tx_invalid_descs(struct ifobject *ifobject)
-{
- struct xsk_socket *xsk = ifobject->xsk->xsk;
- int fd = xsk_socket__fd(xsk);
- struct xdp_statistics stats;
- socklen_t optlen;
- int err;
-
- optlen = sizeof(stats);
- err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
- if (err) {
- ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
- __func__, -err, strerror(-err));
- return TEST_FAILURE;
- }
-
- if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
- ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
- __func__,
- (unsigned long long)stats.tx_invalid_descs,
- ifobject->xsk->pkt_stream->nb_pkts);
- return TEST_FAILURE;
- }
-
- return TEST_PASS;
-}
-
-static void xsk_configure_socket(struct test_spec *test, struct ifobject *ifobject,
- struct xsk_umem_info *umem, bool tx)
-{
- int i, ret;
-
- for (i = 0; i < test->nb_sockets; i++) {
- bool shared = (ifobject->shared_umem && tx) ? true : !!i;
- u32 ctr = 0;
-
- while (ctr++ < SOCK_RECONF_CTR) {
- ret = __xsk_configure_socket(&ifobject->xsk_arr[i], umem,
- ifobject, shared);
- if (!ret)
- break;
-
- /* Retry if it fails as xsk_socket__create() is asynchronous */
- if (ctr >= SOCK_RECONF_CTR)
- exit_with_error(-ret);
- usleep(USLEEP_MAX);
- }
- if (ifobject->busy_poll)
- enable_busy_poll(&ifobject->xsk_arr[i]);
- }
-}
-
-static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject)
-{
- xsk_configure_socket(test, ifobject, test->ifobj_rx->umem, true);
- ifobject->xsk = &ifobject->xsk_arr[0];
- ifobject->xskmap = test->ifobj_rx->xskmap;
- memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
- ifobject->umem->base_addr = 0;
-}
-
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream,
- bool fill_up)
-{
- u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM;
- u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts;
- int ret;
-
- if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
- buffers_to_fill = umem->num_frames;
- else
- buffers_to_fill = umem->fill_size;
-
- ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
- if (ret != buffers_to_fill)
- exit_with_error(ENOSPC);
-
- while (filled < buffers_to_fill) {
- struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts);
- u64 addr;
- u32 i;
-
- for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) {
- if (!pkt) {
- if (!fill_up)
- break;
- addr = filled * umem->frame_size + umem->base_addr;
- } else if (pkt->offset >= 0) {
- addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem);
- } else {
- addr = pkt->offset + umem_alloc_buffer(umem);
- }
-
- *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
- if (++filled >= buffers_to_fill)
- break;
- }
- }
- xsk_ring_prod__submit(&umem->fq, filled);
- xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled);
-
- pkt_stream_reset(pkt_stream);
- umem_reset_alloc(umem);
-}
-
-static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
-{
- u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
- int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
- LIBBPF_OPTS(bpf_xdp_query_opts, opts);
- void *bufs;
- int ret;
- u32 i;
-
- if (ifobject->umem->unaligned_mode)
- mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB;
-
- if (ifobject->shared_umem)
- umem_sz *= 2;
-
- bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
- if (bufs == MAP_FAILED)
- exit_with_error(errno);
-
- ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz);
- if (ret)
- exit_with_error(-ret);
-
- xsk_configure_socket(test, ifobject, ifobject->umem, false);
-
- ifobject->xsk = &ifobject->xsk_arr[0];
-
- if (!ifobject->rx_on)
- return;
-
- xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, ifobject->use_fill_ring);
-
- for (i = 0; i < test->nb_sockets; i++) {
- ifobject->xsk = &ifobject->xsk_arr[i];
- ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i);
- if (ret)
- exit_with_error(errno);
- }
-}
-
-static void *worker_testapp_validate_tx(void *arg)
-{
- struct test_spec *test = (struct test_spec *)arg;
- struct ifobject *ifobject = test->ifobj_tx;
- int err;
-
- if (test->current_step == 1) {
- if (!ifobject->shared_umem)
- thread_common_ops(test, ifobject);
- else
- thread_common_ops_tx(test, ifobject);
- }
-
- err = send_pkts(test, ifobject);
-
- if (!err && ifobject->validation_func)
- err = ifobject->validation_func(ifobject);
- if (err)
- report_failure(test);
-
- pthread_exit(NULL);
-}
-
-static void *worker_testapp_validate_rx(void *arg)
-{
- struct test_spec *test = (struct test_spec *)arg;
- struct ifobject *ifobject = test->ifobj_rx;
- int err;
-
- if (test->current_step == 1) {
- thread_common_ops(test, ifobject);
- } else {
- xsk_clear_xskmap(ifobject->xskmap);
- err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0);
- if (err) {
- ksft_print_msg("Error: Failed to update xskmap, error %s\n",
- strerror(-err));
- exit_with_error(-err);
- }
- }
-
- pthread_barrier_wait(&barr);
-
- err = receive_pkts(test);
-
- if (!err && ifobject->validation_func)
- err = ifobject->validation_func(ifobject);
-
- if (err) {
- if (test->adjust_tail && !is_adjust_tail_supported(ifobject->xdp_progs))
- test->adjust_tail_support = false;
- else
- report_failure(test);
- }
-
- pthread_exit(NULL);
-}
-
-static u64 ceil_u64(u64 a, u64 b)
-{
- return (a + b - 1) / b;
-}
-
-static void testapp_clean_xsk_umem(struct ifobject *ifobj)
-{
- u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size;
-
- if (ifobj->shared_umem)
- umem_sz *= 2;
-
- umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
- xsk_umem__delete(ifobj->umem->umem);
- munmap(ifobj->umem->buffer, umem_sz);
-}
-
-static void handler(int signum)
-{
- pthread_exit(NULL);
-}
-
-static bool xdp_prog_changed_rx(struct test_spec *test)
-{
- struct ifobject *ifobj = test->ifobj_rx;
-
- return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
-}
-
-static bool xdp_prog_changed_tx(struct test_spec *test)
-{
- struct ifobject *ifobj = test->ifobj_tx;
-
- return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode;
-}
-
-static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
- struct bpf_map *xskmap, enum test_mode mode)
-{
- int err;
-
- xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
- err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
- if (err) {
- ksft_print_msg("Error attaching XDP program\n");
- exit_with_error(-err);
- }
-
- if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC))
- if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) {
- ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
- exit_with_error(EINVAL);
- }
-
- ifobj->xdp_prog = xdp_prog;
- ifobj->xskmap = xskmap;
- ifobj->mode = mode;
-}
-
-static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
- struct ifobject *ifobj_tx)
-{
- if (xdp_prog_changed_rx(test))
- xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
-
- if (!ifobj_tx || ifobj_tx->shared_umem)
- return;
-
- if (xdp_prog_changed_tx(test))
- xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
-}
-
-static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1,
- struct ifobject *ifobj2)
-{
- pthread_t t0, t1;
- int err;
-
- if (test->mtu > MAX_ETH_PKT_SIZE) {
- if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp ||
- (ifobj2 && !ifobj2->multi_buff_zc_supp))) {
- ksft_test_result_skip("Multi buffer for zero-copy not supported.\n");
- return TEST_SKIP;
- }
- if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp ||
- (ifobj2 && !ifobj2->multi_buff_supp))) {
- ksft_test_result_skip("Multi buffer not supported.\n");
- return TEST_SKIP;
- }
- }
- err = test_spec_set_mtu(test, test->mtu);
- if (err) {
- ksft_print_msg("Error, could not set mtu.\n");
- exit_with_error(err);
- }
-
- if (ifobj2) {
- if (pthread_barrier_init(&barr, NULL, 2))
- exit_with_error(errno);
- pkt_stream_reset(ifobj2->xsk->pkt_stream);
- }
-
- test->current_step++;
- pkt_stream_reset(ifobj1->xsk->pkt_stream);
- pkts_in_flight = 0;
-
- signal(SIGUSR1, handler);
- /*Spawn RX thread */
- pthread_create(&t0, NULL, ifobj1->func_ptr, test);
-
- if (ifobj2) {
- pthread_barrier_wait(&barr);
- if (pthread_barrier_destroy(&barr))
- exit_with_error(errno);
-
- /*Spawn TX thread */
- pthread_create(&t1, NULL, ifobj2->func_ptr, test);
-
- pthread_join(t1, NULL);
- }
-
- if (!ifobj2)
- pthread_kill(t0, SIGUSR1);
- else
- pthread_join(t0, NULL);
-
- if (test->total_steps == test->current_step || test->fail) {
- u32 i;
-
- if (ifobj2)
- for (i = 0; i < test->nb_sockets; i++)
- xsk_socket__delete(ifobj2->xsk_arr[i].xsk);
-
- for (i = 0; i < test->nb_sockets; i++)
- xsk_socket__delete(ifobj1->xsk_arr[i].xsk);
-
- testapp_clean_xsk_umem(ifobj1);
- if (ifobj2 && !ifobj2->shared_umem)
- testapp_clean_xsk_umem(ifobj2);
- }
-
- return !!test->fail;
-}
-
-static int testapp_validate_traffic(struct test_spec *test)
-{
- struct ifobject *ifobj_rx = test->ifobj_rx;
- struct ifobject *ifobj_tx = test->ifobj_tx;
-
- if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) ||
- (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) {
- ksft_test_result_skip("No huge pages present.\n");
- return TEST_SKIP;
- }
-
- if (test->set_ring) {
- if (ifobj_tx->hw_ring_size_supp) {
- if (set_ring_size(ifobj_tx)) {
- ksft_test_result_skip("Failed to change HW ring size.\n");
- return TEST_FAILURE;
- }
- } else {
- ksft_test_result_skip("Changing HW ring size not supported.\n");
- return TEST_SKIP;
- }
- }
-
- xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx);
- return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
-}
-
-static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj)
-{
- return __testapp_validate_traffic(test, ifobj, NULL);
-}
-
-static int testapp_teardown(struct test_spec *test)
-{
- int i;
-
- for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
- if (testapp_validate_traffic(test))
- return TEST_FAILURE;
- test_spec_reset(test);
- }
-
- return TEST_PASS;
-}
-
-static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
-{
- thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
- struct ifobject *tmp_ifobj = (*ifobj1);
-
- (*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
- (*ifobj2)->func_ptr = tmp_func_ptr;
-
- *ifobj1 = *ifobj2;
- *ifobj2 = tmp_ifobj;
-}
-
-static int testapp_bidirectional(struct test_spec *test)
-{
- int res;
-
- test->ifobj_tx->rx_on = true;
- test->ifobj_rx->tx_on = true;
- test->total_steps = 2;
- if (testapp_validate_traffic(test))
- return TEST_FAILURE;
-
- print_verbose("Switching Tx/Rx direction\n");
- swap_directions(&test->ifobj_rx, &test->ifobj_tx);
- res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
-
- swap_directions(&test->ifobj_rx, &test->ifobj_tx);
- return res;
-}
-
-static int swap_xsk_resources(struct test_spec *test)
-{
- int ret;
-
- test->ifobj_tx->xsk_arr[0].pkt_stream = NULL;
- test->ifobj_rx->xsk_arr[0].pkt_stream = NULL;
- test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default;
- test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default;
- test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1];
- test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1];
-
- ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0);
- if (ret)
- return TEST_FAILURE;
-
- return TEST_PASS;
-}
-
-static int testapp_xdp_prog_cleanup(struct test_spec *test)
-{
- test->total_steps = 2;
- test->nb_sockets = 2;
- if (testapp_validate_traffic(test))
- return TEST_FAILURE;
-
- if (swap_xsk_resources(test))
- return TEST_FAILURE;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_headroom(struct test_spec *test)
-{
- test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_rx_dropped(struct test_spec *test)
-{
- if (test->mode == TEST_MODE_ZC) {
- ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n");
- return TEST_SKIP;
- }
-
- pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
- test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
- XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
- pkt_stream_receive_half(test);
- test->ifobj_rx->validation_func = validate_rx_dropped;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_tx_invalid_descs(struct test_spec *test)
-{
- pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
- test->ifobj_tx->validation_func = validate_tx_invalid_descs;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_rx_full(struct test_spec *test)
-{
- pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
- test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
-
- test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
- test->ifobj_rx->release_rx = false;
- test->ifobj_rx->validation_func = validate_rx_full;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_fill_empty(struct test_spec *test)
-{
- pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
- test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
-
- test->ifobj_rx->use_fill_ring = false;
- test->ifobj_rx->validation_func = validate_fill_empty;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_unaligned(struct test_spec *test)
-{
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- /* Let half of the packets straddle a 4K buffer boundary */
- pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2);
-
- return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_unaligned_mb(struct test_spec *test)
-{
- test->mtu = MAX_ETH_JUMBO_SIZE;
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
- return testapp_validate_traffic(test);
-}
-
-static int testapp_single_pkt(struct test_spec *test)
-{
- struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
-
- pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
- return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_mb(struct test_spec *test)
-{
- test->mtu = MAX_ETH_JUMBO_SIZE;
- pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
-
- return testapp_validate_traffic(test);
-}
-
-static int testapp_invalid_desc_mb(struct test_spec *test)
-{
- struct xsk_umem_info *umem = test->ifobj_tx->umem;
- u64 umem_size = umem->num_frames * umem->frame_size;
- struct pkt pkts[] = {
- /* Valid packet for synch to start with */
- {0, MIN_PKT_SIZE, 0, true, 0},
- /* Zero frame len is not legal */
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- {0, 0, 0, false, 0},
- /* Invalid address in the second frame */
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- {umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- /* Invalid len in the middle */
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- /* Invalid options in the middle */
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION},
- /* Transmit 2 frags, receive 3 */
- {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD},
- {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0},
- /* Middle frame crosses chunk boundary with small length */
- {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
- {-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0},
- /* Valid packet for synch so that something is received */
- {0, MIN_PKT_SIZE, 0, true, 0}};
-
- if (umem->unaligned_mode) {
- /* Crossing a chunk boundary allowed */
- pkts[12].valid = true;
- pkts[13].valid = true;
- }
-
- test->mtu = MAX_ETH_JUMBO_SIZE;
- pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
- return testapp_validate_traffic(test);
-}
-
-static int testapp_invalid_desc(struct test_spec *test)
-{
- struct xsk_umem_info *umem = test->ifobj_tx->umem;
- u64 umem_size = umem->num_frames * umem->frame_size;
- struct pkt pkts[] = {
- /* Zero packet address allowed */
- {0, MIN_PKT_SIZE, 0, true},
- /* Allowed packet */
- {0, MIN_PKT_SIZE, 0, true},
- /* Straddling the start of umem */
- {-2, MIN_PKT_SIZE, 0, false},
- /* Packet too large */
- {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
- /* Up to end of umem allowed */
- {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true},
- /* After umem ends */
- {umem_size, MIN_PKT_SIZE, 0, false},
- /* Straddle the end of umem */
- {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
- /* Straddle a 4K boundary */
- {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
- /* Straddle a 2K boundary */
- {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true},
- /* Valid packet for synch so that something is received */
- {0, MIN_PKT_SIZE, 0, true}};
-
- if (umem->unaligned_mode) {
- /* Crossing a page boundary allowed */
- pkts[7].valid = true;
- }
- if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
- /* Crossing a 2K frame size boundary not allowed */
- pkts[8].valid = false;
- }
-
- if (test->ifobj_tx->shared_umem) {
- pkts[4].offset += umem_size;
- pkts[5].offset += umem_size;
- pkts[6].offset += umem_size;
- }
-
- pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
- return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_drop(struct test_spec *test)
-{
- struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
- struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-
- test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
- skel_rx->maps.xsk, skel_tx->maps.xsk);
-
- pkt_stream_receive_half(test);
- return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_metadata_copy(struct test_spec *test)
-{
- struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
- struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-
- test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
- skel_tx->progs.xsk_xdp_populate_metadata,
- skel_rx->maps.xsk, skel_tx->maps.xsk);
- test->ifobj_rx->use_metadata = true;
-
- skel_rx->bss->count = 0;
-
- return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_shared_umem(struct test_spec *test)
-{
- struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
- struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-
- test->total_steps = 1;
- test->nb_sockets = 2;
-
- test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem,
- skel_tx->progs.xsk_xdp_shared_umem,
- skel_rx->maps.xsk, skel_tx->maps.xsk);
-
- pkt_stream_even_odd_sequence(test);
-
- return testapp_validate_traffic(test);
-}
-
-static int testapp_poll_txq_tmout(struct test_spec *test)
-{
- test->ifobj_tx->use_poll = true;
- /* create invalid frame by set umem frame_size and pkt length equal to 2048 */
- test->ifobj_tx->umem->frame_size = 2048;
- pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
- return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
-}
-
-static int testapp_poll_rxq_tmout(struct test_spec *test)
-{
- test->ifobj_rx->use_poll = true;
- return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
-}
-
-static int testapp_too_many_frags(struct test_spec *test)
-{
- struct pkt *pkts;
- u32 max_frags, i;
- int ret;
-
- if (test->mode == TEST_MODE_ZC) {
- max_frags = test->ifobj_tx->xdp_zc_max_segs;
- } else {
- max_frags = get_max_skb_frags();
- if (!max_frags) {
- ksft_print_msg("Couldn't retrieve MAX_SKB_FRAGS from system, using default (17) value\n");
- max_frags = 17;
- }
- max_frags += 1;
- }
-
- pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
- if (!pkts)
- return TEST_FAILURE;
-
- test->mtu = MAX_ETH_JUMBO_SIZE;
-
- /* Valid packet for synch */
- pkts[0].len = MIN_PKT_SIZE;
- pkts[0].valid = true;
-
- /* One valid packet with the max amount of frags */
- for (i = 1; i < max_frags + 1; i++) {
- pkts[i].len = MIN_PKT_SIZE;
- pkts[i].options = XDP_PKT_CONTD;
- pkts[i].valid = true;
- }
- pkts[max_frags].options = 0;
-
- /* An invalid packet with the max amount of frags but signals packet
- * continues on the last frag
- */
- for (i = max_frags + 1; i < 2 * max_frags + 1; i++) {
- pkts[i].len = MIN_PKT_SIZE;
- pkts[i].options = XDP_PKT_CONTD;
- pkts[i].valid = false;
- }
-
- /* Valid packet for synch */
- pkts[2 * max_frags + 1].len = MIN_PKT_SIZE;
- pkts[2 * max_frags + 1].valid = true;
-
- pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2);
- ret = testapp_validate_traffic(test);
-
- free(pkts);
- return ret;
-}
-
-static int xsk_load_xdp_programs(struct ifobject *ifobj)
-{
- ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
- if (libbpf_get_error(ifobj->xdp_progs))
- return libbpf_get_error(ifobj->xdp_progs);
-
- return 0;
-}
-
-static void xsk_unload_xdp_programs(struct ifobject *ifobj)
-{
- xsk_xdp_progs__destroy(ifobj->xdp_progs);
-}
-
-/* Simple test */
-static bool hugepages_present(void)
-{
- size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
- void *bufs;
-
- bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB);
- if (bufs == MAP_FAILED)
- return false;
-
- mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
- munmap(bufs, mmap_sz);
- return true;
-}
-
-static void init_iface(struct ifobject *ifobj, thread_func_t func_ptr)
-{
- LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
- int err;
-
- ifobj->func_ptr = func_ptr;
-
- err = xsk_load_xdp_programs(ifobj);
- if (err) {
- ksft_print_msg("Error loading XDP program\n");
- exit_with_error(err);
- }
-
- if (hugepages_present())
- ifobj->unaligned_supp = true;
-
- err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts);
- if (err) {
- ksft_print_msg("Error querying XDP capabilities\n");
- exit_with_error(-err);
- }
- if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG)
- ifobj->multi_buff_supp = true;
- if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
- if (query_opts.xdp_zc_max_segs > 1) {
- ifobj->multi_buff_zc_supp = true;
- ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs;
- } else {
- ifobj->xdp_zc_max_segs = 0;
- }
- }
-}
-
-static int testapp_send_receive(struct test_spec *test)
-{
- return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_2k_frame(struct test_spec *test)
-{
- test->ifobj_tx->umem->frame_size = 2048;
- test->ifobj_rx->umem->frame_size = 2048;
- pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
- return testapp_validate_traffic(test);
-}
-
-static int testapp_poll_rx(struct test_spec *test)
-{
- test->ifobj_rx->use_poll = true;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_poll_tx(struct test_spec *test)
-{
- test->ifobj_tx->use_poll = true;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_aligned_inv_desc(struct test_spec *test)
-{
- return testapp_invalid_desc(test);
-}
-
-static int testapp_aligned_inv_desc_2k_frame(struct test_spec *test)
-{
- test->ifobj_tx->umem->frame_size = 2048;
- test->ifobj_rx->umem->frame_size = 2048;
- return testapp_invalid_desc(test);
-}
-
-static int testapp_unaligned_inv_desc(struct test_spec *test)
-{
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- return testapp_invalid_desc(test);
-}
-
-static int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test)
-{
- u64 page_size, umem_size;
-
- /* Odd frame size so the UMEM doesn't end near a page boundary. */
- test->ifobj_tx->umem->frame_size = 4001;
- test->ifobj_rx->umem->frame_size = 4001;
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- /* This test exists to test descriptors that staddle the end of
- * the UMEM but not a page.
- */
- page_size = sysconf(_SC_PAGESIZE);
- umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
- assert(umem_size % page_size > MIN_PKT_SIZE);
- assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
-
- return testapp_invalid_desc(test);
-}
-
-static int testapp_aligned_inv_desc_mb(struct test_spec *test)
-{
- return testapp_invalid_desc_mb(test);
-}
-
-static int testapp_unaligned_inv_desc_mb(struct test_spec *test)
-{
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- return testapp_invalid_desc_mb(test);
-}
-
-static int testapp_xdp_metadata(struct test_spec *test)
-{
- return testapp_xdp_metadata_copy(test);
-}
-
-static int testapp_xdp_metadata_mb(struct test_spec *test)
-{
- test->mtu = MAX_ETH_JUMBO_SIZE;
- return testapp_xdp_metadata_copy(test);
-}
-
-static int testapp_hw_sw_min_ring_size(struct test_spec *test)
-{
- int ret;
-
- test->set_ring = true;
- test->total_steps = 2;
- test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE;
- test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2;
- test->ifobj_tx->xsk->batch_size = 1;
- test->ifobj_rx->xsk->batch_size = 1;
- ret = testapp_validate_traffic(test);
- if (ret)
- return ret;
-
- /* Set batch size to hw_ring_size - 1 */
- test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
- test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
- return testapp_validate_traffic(test);
-}
-
-static int testapp_hw_sw_max_ring_size(struct test_spec *test)
-{
- u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4;
- int ret;
-
- test->set_ring = true;
- test->total_steps = 2;
- test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending;
- test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending;
- test->ifobj_rx->umem->num_frames = max_descs;
- test->ifobj_rx->umem->fill_size = max_descs;
- test->ifobj_rx->umem->comp_size = max_descs;
- test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
- test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-
- ret = testapp_validate_traffic(test);
- if (ret)
- return ret;
-
- /* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when
- * updating the Rx HW tail register.
- */
- test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
- test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
- pkt_stream_replace(test, max_descs, MIN_PKT_SIZE);
- return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value)
-{
- struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
- struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-
- test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail,
- skel_tx->progs.xsk_xdp_adjust_tail,
- skel_rx->maps.xsk, skel_tx->maps.xsk);
-
- skel_rx->bss->adjust_value = adjust_value;
-
- return testapp_validate_traffic(test);
-}
-
-static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len)
-{
- int ret;
-
- test->adjust_tail_support = true;
- test->adjust_tail = true;
- test->total_steps = 1;
-
- pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len);
- pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value);
-
- ret = testapp_xdp_adjust_tail(test, value);
- if (ret)
- return ret;
-
- if (!test->adjust_tail_support) {
- ksft_test_result_skip("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n",
- mode_string(test), busy_poll_string(test));
- return TEST_SKIP;
- }
-
- return 0;
-}
-
-static int testapp_adjust_tail_shrink(struct test_spec *test)
-{
- /* Shrink by 4 bytes for testing purpose */
- return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2);
-}
-
-static int testapp_adjust_tail_shrink_mb(struct test_spec *test)
-{
- test->mtu = MAX_ETH_JUMBO_SIZE;
- /* Shrink by the frag size */
- return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2);
-}
-
-static int testapp_adjust_tail_grow(struct test_spec *test)
-{
- /* Grow by 4 bytes for testing purpose */
- return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2);
-}
-
-static int testapp_adjust_tail_grow_mb(struct test_spec *test)
-{
- test->mtu = MAX_ETH_JUMBO_SIZE;
- /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */
- return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1,
- XSK_UMEM__LARGE_FRAME_SIZE * 2);
-}
-
-static int testapp_tx_queue_consumer(struct test_spec *test)
-{
- int nr_packets;
-
- if (test->mode == TEST_MODE_ZC) {
- ksft_test_result_skip("Can not run TX_QUEUE_CONSUMER test for ZC mode\n");
- return TEST_SKIP;
- }
-
- nr_packets = MAX_TX_BUDGET_DEFAULT + 1;
- pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE);
- test->ifobj_tx->xsk->batch_size = nr_packets;
- test->ifobj_tx->xsk->check_consumer = true;
-
- return testapp_validate_traffic(test);
-}
-
-static void run_pkt_test(struct test_spec *test)
-{
- int ret;
-
- ret = test->test_func(test);
-
- if (ret == TEST_PASS)
- ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
- test->name);
- pkt_stream_restore_default(test);
-}
-
-static struct ifobject *ifobject_create(void)
-{
- struct ifobject *ifobj;
-
- ifobj = calloc(1, sizeof(struct ifobject));
- if (!ifobj)
- return NULL;
-
- ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
- if (!ifobj->xsk_arr)
- goto out_xsk_arr;
-
- ifobj->umem = calloc(1, sizeof(*ifobj->umem));
- if (!ifobj->umem)
- goto out_umem;
-
- return ifobj;
-
-out_umem:
- free(ifobj->xsk_arr);
-out_xsk_arr:
- free(ifobj);
- return NULL;
-}
-
-static void ifobject_delete(struct ifobject *ifobj)
-{
- free(ifobj->umem);
- free(ifobj->xsk_arr);
- free(ifobj);
-}
-
-static bool is_xdp_supported(int ifindex)
-{
- int flags = XDP_FLAGS_DRV_MODE;
-
- LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = flags);
- struct bpf_insn insns[2] = {
- BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
- BPF_EXIT_INSN()
- };
- int prog_fd, insn_cnt = ARRAY_SIZE(insns);
- int err;
-
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
- if (prog_fd < 0)
- return false;
-
- err = bpf_xdp_attach(ifindex, prog_fd, flags, NULL);
- if (err) {
- close(prog_fd);
- return false;
- }
-
- bpf_xdp_detach(ifindex, flags, NULL);
- close(prog_fd);
-
- return true;
-}
-
-static const struct test_spec tests[] = {
- {.name = "SEND_RECEIVE", .test_func = testapp_send_receive},
- {.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame},
- {.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt},
- {.name = "POLL_RX", .test_func = testapp_poll_rx},
- {.name = "POLL_TX", .test_func = testapp_poll_tx},
- {.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout},
- {.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout},
- {.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned},
- {.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc},
- {.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame},
- {.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc},
- {.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE",
- .test_func = testapp_unaligned_inv_desc_4001_frame},
- {.name = "UMEM_HEADROOM", .test_func = testapp_headroom},
- {.name = "TEARDOWN", .test_func = testapp_teardown},
- {.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional},
- {.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped},
- {.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs},
- {.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full},
- {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty},
- {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup},
- {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop},
- {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem},
- {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata},
- {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb},
- {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb},
- {.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS",
- .test_func = testapp_send_receive_unaligned_mb},
- {.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
- {.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
- {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
- {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
- {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
- {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink},
- {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
- {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
- {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
- {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer},
- };
-
static void print_tests(void)
{
u32 i;
@@ -2833,7 +376,7 @@ int main(int argc, char **argv)
init_iface(ifobj_rx, worker_testapp_validate_rx);
init_iface(ifobj_tx, worker_testapp_validate_tx);

- test_spec_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
+ test_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
if (!tx_pkt_stream_default || !rx_pkt_stream_default)
@@ -2868,7 +411,7 @@ int main(int argc, char **argv)
if (opt_run_test != RUN_ALL_TESTS && j != opt_run_test)
continue;

- test_spec_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]);
+ test_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]);
run_pkt_test(&test);
usleep(USLEEP_MAX);

diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 4df3a5d329ac..3ca518df23ad 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -22,169 +22,13 @@
#define PF_XDP AF_XDP
#endif

-#ifndef SO_BUSY_POLL_BUDGET
-#define SO_BUSY_POLL_BUDGET 70
-#endif
-
-#ifndef SO_PREFER_BUSY_POLL
-#define SO_PREFER_BUSY_POLL 69
-#endif
-
-#define TEST_PASS 0
-#define TEST_FAILURE -1
-#define TEST_CONTINUE 1
-#define TEST_SKIP 2
-#define MAX_INTERFACES 2
-#define MAX_INTERFACE_NAME_CHARS 16
-#define MAX_TEST_NAME_SIZE 48
#define MAX_TEARDOWN_ITER 10
-#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
-#define MIN_PKT_SIZE 64
-#define MAX_ETH_PKT_SIZE 1518
#define MAX_ETH_JUMBO_SIZE 9000
-#define USLEEP_MAX 10000
#define SOCK_RECONF_CTR 10
-#define DEFAULT_BATCH_SIZE 64
-#define POLL_TMOUT 1000
-#define THREAD_TMOUT 3
-#define DEFAULT_PKT_CNT (4 * 1024)
-#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
#define RX_FULL_RXQSIZE 32
#define UMEM_HEADROOM_TEST_SIZE 128
#define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1)
-#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
-#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
-#define XSK_DESC__INVALID_OPTION (0xffff)
-#define HUGEPAGE_SIZE (2 * 1024 * 1024)
-#define PKT_DUMP_NB_TO_PRINT 16
#define RUN_ALL_TESTS UINT_MAX
#define NUM_MAC_ADDRESSES 4

-#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
-
-enum test_mode {
- TEST_MODE_SKB,
- TEST_MODE_DRV,
- TEST_MODE_ZC,
- TEST_MODE_ALL
-};
-
-struct xsk_umem_info {
- struct xsk_ring_prod fq;
- struct xsk_ring_cons cq;
- struct xsk_umem *umem;
- u64 next_buffer;
- u32 num_frames;
- u32 frame_headroom;
- void *buffer;
- u32 frame_size;
- u32 base_addr;
- u32 fill_size;
- u32 comp_size;
- bool unaligned_mode;
-};
-
-struct xsk_socket_info {
- struct xsk_ring_cons rx;
- struct xsk_ring_prod tx;
- struct xsk_umem_info *umem;
- struct xsk_socket *xsk;
- struct pkt_stream *pkt_stream;
- u32 outstanding_tx;
- u32 rxqsize;
- u32 batch_size;
- u8 dst_mac[ETH_ALEN];
- u8 src_mac[ETH_ALEN];
- bool check_consumer;
-};
-
-struct pkt {
- int offset;
- u32 len;
- u32 pkt_nb;
- bool valid;
- u16 options;
-};
-
-struct pkt_stream {
- u32 nb_pkts;
- u32 current_pkt_nb;
- struct pkt *pkts;
- u32 max_pkt_len;
- u32 nb_rx_pkts;
- u32 nb_valid_entries;
- bool verbatim;
-};
-
-struct set_hw_ring {
- u32 default_tx;
- u32 default_rx;
-};
-
-struct ifobject;
-struct test_spec;
-typedef int (*validation_func_t)(struct ifobject *ifobj);
-typedef void *(*thread_func_t)(void *arg);
-typedef int (*test_func_t)(struct test_spec *test);
-
-struct ifobject {
- char ifname[MAX_INTERFACE_NAME_CHARS];
- struct xsk_socket_info *xsk;
- struct xsk_socket_info *xsk_arr;
- struct xsk_umem_info *umem;
- thread_func_t func_ptr;
- validation_func_t validation_func;
- struct xsk_xdp_progs *xdp_progs;
- struct bpf_map *xskmap;
- struct bpf_program *xdp_prog;
- struct ethtool_ringparam ring;
- struct set_hw_ring set_ring;
- enum test_mode mode;
- int ifindex;
- int mtu;
- u32 bind_flags;
- u32 xdp_zc_max_segs;
- bool tx_on;
- bool rx_on;
- bool use_poll;
- bool busy_poll;
- bool use_fill_ring;
- bool release_rx;
- bool shared_umem;
- bool use_metadata;
- bool unaligned_supp;
- bool multi_buff_supp;
- bool multi_buff_zc_supp;
- bool hw_ring_size_supp;
-};
-
-struct test_spec {
- struct ifobject *ifobj_tx;
- struct ifobject *ifobj_rx;
- struct pkt_stream *tx_pkt_stream_default;
- struct pkt_stream *rx_pkt_stream_default;
- struct bpf_program *xdp_prog_rx;
- struct bpf_program *xdp_prog_tx;
- struct bpf_map *xskmap_rx;
- struct bpf_map *xskmap_tx;
- test_func_t test_func;
- int mtu;
- u16 total_steps;
- u16 current_step;
- u16 nb_sockets;
- bool fail;
- bool set_ring;
- bool adjust_tail;
- bool adjust_tail_support;
- enum test_mode mode;
- char name[MAX_TEST_NAME_SIZE];
-};
-
-pthread_barrier_t barr;
-pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-int pkts_in_flight;
-
-static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
-
#endif /* XSKXCEIVER_H_ */
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 87f89fd92f8c..64d3941576d5 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -247,9 +247,9 @@ function listen_port_and_save_to() {
SOCAT_MODE="UDP6-LISTEN"
fi

- # Just wait for 2 seconds
- timeout 2 ip netns exec "${NAMESPACE}" \
- socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}"
+ # Just wait for 3 seconds
+ timeout 3 ip netns exec "${NAMESPACE}" \
+ socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" 2> /dev/null
}

# Only validate that the message arrived properly
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 0441a18f098b..aac8ef490feb 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -317,7 +317,7 @@ police_limits_test()

tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
flower skip_sw \
- action police rate 0.5kbit burst 1m conform-exceed drop/ok
+ action police rate 0.5kbit burst 2k conform-exceed drop/ok
check_fail $? "Incorrect success to add police action with too low rate"

tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
@@ -327,7 +327,7 @@ police_limits_test()

tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
flower skip_sw \
- action police rate 1.5kbit burst 1m conform-exceed drop/ok
+ action police rate 1.5kbit burst 2k conform-exceed drop/ok
check_err $? "Failed to add police action with low rate"

tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 5b993924cc3f..2ca07ea7202a 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -18,6 +18,9 @@
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <sys/sem.h>
#include <unistd.h>
#include <ctype.h>

@@ -39,6 +42,20 @@
F_SEAL_EXEC)

#define MFD_NOEXEC_SEAL 0x0008U
+union semun {
+ int val;
+ struct semid_ds *buf;
+ unsigned short int *array;
+ struct seminfo *__buf;
+};
+
+/*
+ * we use semaphores on nested wait tasks due the use of CLONE_NEWPID: the
+ * child will be PID 1 and can't send SIGSTOP to themselves due special
+ * treatment of the init task, so the SIGSTOP/SIGCONT synchronization
+ * approach can't be used here.
+ */
+#define SEM_KEY 0xdeadbeef

/*
* Default is not to test hugetlbfs
@@ -1333,8 +1350,22 @@ static int sysctl_nested(void *arg)

static int sysctl_nested_wait(void *arg)
{
- /* Wait for a SIGCONT. */
- kill(getpid(), SIGSTOP);
+ int sem = semget(SEM_KEY, 1, 0600);
+ struct sembuf sembuf;
+
+ if (sem < 0) {
+ perror("semget:");
+ abort();
+ }
+ sembuf.sem_num = 0;
+ sembuf.sem_flg = 0;
+ sembuf.sem_op = 0;
+
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ abort();
+ }
+
return sysctl_nested(arg);
}

@@ -1355,7 +1386,9 @@ static void test_sysctl_sysctl2_failset(void)

static int sysctl_nested_child(void *arg)
{
- int pid;
+ int pid, sem;
+ union semun semun;
+ struct sembuf sembuf;

printf("%s nested sysctl 0\n", memfd_str);
sysctl_assert_write("0");
@@ -1389,23 +1422,53 @@ static int sysctl_nested_child(void *arg)
test_sysctl_sysctl2_failset);
join_thread(pid);

+ sem = semget(SEM_KEY, 1, IPC_CREAT | 0600);
+ if (sem < 0) {
+ perror("semget:");
+ return 1;
+ }
+ semun.val = 1;
+ sembuf.sem_op = -1;
+ sembuf.sem_flg = 0;
+ sembuf.sem_num = 0;
+
/* Verify that the rules are actually inherited after fork. */
printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
sysctl_assert_write("0");

+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl1_failset);
sysctl_assert_write("1");
- kill(pid, SIGCONT);
+
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);

printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
sysctl_assert_write("0");

+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl2_failset);
sysctl_assert_write("2");
- kill(pid, SIGCONT);
+
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);

/*
@@ -1415,28 +1478,62 @@ static int sysctl_nested_child(void *arg)
*/
printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
sysctl_assert_write("2");
+
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl2);
sysctl_assert_write("1");
- kill(pid, SIGCONT);
+
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);

printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
sysctl_assert_write("2");
+
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl2);
sysctl_assert_write("0");
- kill(pid, SIGCONT);
+
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);

printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
sysctl_assert_write("1");
+
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl1);
sysctl_assert_write("0");
- kill(pid, SIGCONT);
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);

+ semctl(sem, 0, IPC_RMID);
+
return 0;
}

diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index 6560c26f47d1..0df61422467d 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -1612,8 +1612,8 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
* the first sub-page and test if we get another sub-page populated
* automatically.
*/
- FORCE_READ(mem);
- FORCE_READ(smem);
+ FORCE_READ(*mem);
+ FORCE_READ(*smem);
if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
!pagemap_is_populated(pagemap_fd, smem + pagesize)) {
ksft_test_result_skip("Did not get THPs populated\n");
@@ -1663,8 +1663,8 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc)
}

/* Fault the page in. */
- FORCE_READ(mem);
- FORCE_READ(smem);
+ FORCE_READ(*mem);
+ FORCE_READ(*smem);

fn(mem, smem, pagesize);
munmap:
@@ -1719,8 +1719,8 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
}

/* Fault the page in. */
- FORCE_READ(mem);
- FORCE_READ(smem);
+ FORCE_READ(*mem);
+ FORCE_READ(*smem);

fn(mem, smem, pagesize);
munmap:
@@ -1773,8 +1773,8 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
}

/* Fault the page in. */
- FORCE_READ(mem);
- FORCE_READ(smem);
+ FORCE_READ(*mem);
+ FORCE_READ(*smem);

fn(mem, smem, hugetlbsize);
munmap:
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index ea89e558672d..86edbc7e2489 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -223,7 +223,7 @@ mirred_egress_to_ingress_tcp_test()
ip_proto icmp \
action drop

- ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
+ ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 > $mirred_e2i_tf2 &
local rpid=$!
ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
wait -n $rpid
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index b43816dd998c..457f41d5e584 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -567,6 +567,21 @@ vxlan_encapped_ping_do()
local inner_tos=$1; shift
local outer_tos=$1; shift

+ local ipv4hdr=$(:
+ )"45:"$( : IP version + IHL
+ )"$inner_tos:"$( : IP TOS
+ )"00:54:"$( : IP total length
+ )"99:83:"$( : IP identification
+ )"40:00:"$( : IP flags + frag off
+ )"40:"$( : IP TTL
+ )"01:"$( : IP proto
+ )"CHECKSUM:"$( : IP header csum
+ )"c0:00:02:03:"$( : IP saddr: 192.0.2.3
+ )"c0:00:02:01"$( : IP daddr: 192.0.2.1
+ )
+ local checksum=$(payload_template_calc_checksum "$ipv4hdr")
+ ipv4hdr=$(payload_template_expand_checksum "$ipv4hdr" $checksum)
+
$MZ $dev -c $count -d 100msec -q \
-b $next_hop_mac -B $dest_ip \
-t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(:
@@ -577,16 +592,7 @@ vxlan_encapped_ping_do()
)"$dest_mac:"$( : ETH daddr
)"$(mac_get w2):"$( : ETH saddr
)"08:00:"$( : ETH type
- )"45:"$( : IP version + IHL
- )"$inner_tos:"$( : IP TOS
- )"00:54:"$( : IP total length
- )"99:83:"$( : IP identification
- )"40:00:"$( : IP flags + frag off
- )"40:"$( : IP TTL
- )"01:"$( : IP proto
- )"00:00:"$( : IP header csum
- )"c0:00:02:03:"$( : IP saddr: 192.0.2.3
- )"c0:00:02:01:"$( : IP daddr: 192.0.2.1
+ )"$ipv4hdr:"$( : IPv4 header
)"08:"$( : ICMP type
)"00:"$( : ICMP code
)"8b:f2:"$( : ICMP csum
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
index a603f7b0a08f..e642feeada0e 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
@@ -695,7 +695,7 @@ vxlan_encapped_ping_do()
)"6"$( : IP version
)"$inner_tos"$( : Traffic class
)"0:00:00:"$( : Flow label
- )"00:08:"$( : Payload length
+ )"00:03:"$( : Payload length
)"3a:"$( : Next header
)"04:"$( : Hop limit
)"$saddr:"$( : IP saddr
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index f448bafb3f20..d0306b27fe95 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -576,7 +576,7 @@ ip_link_has_flag()
local flag=$1; shift

local state=$(ip -j link show "$name" |
- jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)')
+ jq --arg flag "$flag" 'any(.[].flags[]; . == $flag)')
[[ $state == true ]]
}

diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 195f04c4d158..b9c1bfb6cc02 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -243,6 +243,16 @@ int snc_nodes_per_l3_cache(void)
}
snc_mode = cache_cpus / node_cpus;

+ /*
+ * On some platforms (e.g. Hygon),
+ * cache_cpus < node_cpus, the calculated snc_mode is 0.
+ *
+ * Set snc_mode = 1 to indicate that SNC mode is not
+ * supported on the platform.
+ */
+ if (!snc_mode)
+ snc_mode = 1;
+
if (snc_mode > 1)
ksft_print_msg("SNC-%d mode discovered.\n", snc_mode);
}