[PATCH iwl-net 4/4] igb: fix igb_msix_other() handling for PREEMPT_RT
From: Wander Lairson Costa
Date: Wed Dec 04 2024 - 07:06:48 EST
During testing of SR-IOV, Red Hat QE encountered an issue where the
ip link up command intermittently fails for the igbvf interfaces when
using the PREEMPT_RT variant. Investigation revealed that
e1000_write_posted_mbx returns an error due to the lack of an ACK
from e1000_poll_for_ack.
The underlying issue arises from the fact that IRQs are threaded by
default under PREEMPT_RT. While the exact hardware details are not
available, it appears that the IRQ handled by igb_msix_other must
be processed before e1000_poll_for_ack times out. However,
e1000_write_posted_mbx is called with preemption disabled, leading
to a scenario where the IRQ is serviced only after the failure of
e1000_write_posted_mbx.
Commit 338c4d3902fe ("igb: Disable threaded IRQ for igb_msix_other")
forced the ISR to run in a non-threaded context. However, Sebastian
observed that some functions called within the ISR acquire locks that
may sleep.
In the previous two patches, we managed to make igb_msg_mask() safe to
call from an interrupt context.
In this commit, we move most of the ISR handling to an interrupt
context, leaving non IRQ safe code to be called from the thread
context under PREEMPT_RT.
Reproducer:
ipaddr_vlan=3
nic_test=ens14f0
vf=${nic_test}v0 # The main testing steps:
while true; do
ip link set ${nic_test} mtu 1500
ip link set ${vf} mtu 1500
ip link set $vf up
# 3. set vlan and ip for VF
ip link set ${nic_test} vf 0 vlan ${ipaddr_vlan}
ip addr add 172.30.${ipaddr_vlan}.1/24 dev ${vf}
ip addr add 2021:db8:${ipaddr_vlan}::1/64 dev ${vf}
# 4. check the link state for VF and PF
ip link show ${nic_test}
if ! ip link show $vf | grep 'state UP'; then
echo 'Error found'
break
fi
ip link set $vf down
done
You can also reproduce it more reliably by setting nr_cpus=1 in the
kernel command line.
Fixes: 9d5c824399de ("igb: PCI-Express 82575 Gigabit Ethernet driver")
Signed-off-by: Wander Lairson Costa <wander@xxxxxxxxxx>
Reported-by: Yuying Ma <yuma@xxxxxxxxxx>
---
drivers/net/ethernet/intel/igb/igb_main.c | 35 ++++++++++++++++-------
1 file changed, 24 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 5828831fd29c2..b2894cebe2c9e 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -131,6 +131,7 @@ static void igb_set_uta(struct igb_adapter *adapter, bool set);
static irqreturn_t igb_intr(int irq, void *);
static irqreturn_t igb_intr_msi(int irq, void *);
static irqreturn_t igb_msix_other(int irq, void *);
+static irqreturn_t igb_msix_other_threaded(int irq, void *);
static irqreturn_t igb_msix_ring(int irq, void *);
#ifdef CONFIG_IGB_DCA
static void igb_update_dca(struct igb_q_vector *);
@@ -151,7 +152,6 @@ static void igb_rar_set_index(struct igb_adapter *, u32);
static void igb_ping_all_vfs(struct igb_adapter *);
static void igb_msg_task_irq_safe(struct igb_adapter *adapter);
static void igb_msg_task_preemptible_safe(struct igb_adapter *adapter);
-static void igb_msg_task(struct igb_adapter *);
static void igb_vmm_control(struct igb_adapter *);
static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
static void igb_flush_mac_table(struct igb_adapter *);
@@ -908,8 +908,9 @@ static int igb_request_msix(struct igb_adapter *adapter)
struct net_device *netdev = adapter->netdev;
int i, err = 0, vector = 0, free_vector = 0;
- err = request_irq(adapter->msix_entries[vector].vector,
- igb_msix_other, 0, netdev->name, adapter);
+ err = request_threaded_irq(adapter->msix_entries[vector].vector,
+ igb_msix_other, igb_msix_other_threaded,
+ IRQF_NO_THREAD, netdev->name, adapter);
if (err)
goto err_out;
@@ -7113,9 +7114,27 @@ static irqreturn_t igb_msix_other(int irq, void *data)
igb_check_wvbr(adapter);
}
- /* Check for a mailbox event */
+ /* Check for a mailbox event (interrupt safe part) */
if (icr & E1000_ICR_VMMB)
- igb_msg_task(adapter);
+ igb_msg_task_irq_safe(adapter);
+
+ adapter->test_icr = icr;
+
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ return igb_msix_other_threaded(irq, data);
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t igb_msix_other_threaded(int irq, void *data)
+{
+ struct igb_adapter *adapter = data;
+ struct e1000_hw *hw = &adapter->hw;
+ u32 icr = adapter->test_icr;
+
+ /* Check for a mailbox event (preempible safe part) */
+ if (icr & E1000_ICR_VMMB)
+ igb_msg_task_preemptible_safe(adapter);
if (icr & E1000_ICR_LSC) {
hw->mac.get_link_status = 1;
@@ -8161,12 +8180,6 @@ static void igb_msg_task_preemptible_safe(struct igb_adapter *adapter)
vfs_spin_unlock_irqrestore(adapter, flags);
}
-static __always_inline void igb_msg_task(struct igb_adapter *adapter)
-{
- igb_msg_task_irq_safe(adapter);
- igb_msg_task_preemptible_safe(adapter);
-}
-
/**
* igb_set_uta - Set unicast filter table address
* @adapter: board private structure
--
2.47.0