Re: kernel 4.18.5 Realtek 8111G network adapter stops responding under high system load

From: Heiner Kallweit
Date: Tue Sep 25 2018 - 17:03:51 EST


On 19.09.2018 06:12, David Arendt wrote:
> Hi,
>
> Thanks for the patch.
>
> I just applied it and the TxConfig register now contains 0x4f000f80.
> The next day will show if it really solves the problem.
>
> Thanks in advance,
> David Arendt
>
> On 9/19/18 12:30 AM, Maciej S. Szmigiero wrote:
>> Hi,
>>
>> On 18.09.2018 12:23, David Arendt wrote:
>>> Hi,
>>>
>>> Today I had the network adapter problems again.
>>> So the patch doesn't seem to change anything regarding this problem.
>>> This week my time is unfortunately very limited, but I will try to
>>> find some time next weekend to look a bit more into the issue.
>> If the problem is caused by missing TXCFG_AUTO_FIFO bit in TxConfig,
>> as the register difference would suggest, then you can try applying
>> the following patch (hack) on top of 4.18.8 that is already patched
>> with commit f74dd480cf4e:
>> --- a/drivers/net/ethernet/realtek/r8169.c
>> +++ b/drivers/net/ethernet/realtek/r8169.c
>> @@ -5043,7 +5043,8 @@
>> {
>> /* Set DMA burst size and Interframe Gap Time */
>> RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) |
>> - (InterFrameGap << TxInterFrameGapShift));
>> + (InterFrameGap << TxInterFrameGapShift)
>> + | TXCFG_AUTO_FIFO);
>> }
>>
>> static void rtl_set_rx_max_size(struct rtl8169_private *tp)
>>
>> This hack will probably only work properly on RTL_GIGA_MAC_VER_40 or
>> later NICs.
>>
>> Before running any tests please verify with "ethtool -d enp3s0" that
>> TxConfig register now contains 0x4f000f80, as it did in the old,
>> working driver version.
>>
>> If this does not help then a bisection will most likely be needed.
>>
>>> Thanks in advance,
>>> David Arendt
>> Maciej
>
>
>
@Gabriel:
Thanks for the hint, I wasn't fully aware of this thread.
@Maciej:
Thanks for the analysis.

It seems that all chip versions from 34 (= RTL8168E-VL) with the
exception of version 39 (= RTL8106E, first sub-version) need
bit TXCFG_AUTO_FIFO.

And indeed, due to reordering of calls this bit is overwritten.
Following patch moves setting the bit from the chip-specific
hw_start function to rtl_set_tx_config_registers().

Whoever is hit by the issue and has the option to build a kernel,
could you please test whether the patch fixes the issue for you?

Thanks, Heiner

---
drivers/net/ethernet/realtek/r8169.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index f882be49f..ae8abe900 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4514,9 +4514,14 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)

static void rtl_set_tx_config_registers(struct rtl8169_private *tp)
{
- /* Set DMA burst size and Interframe Gap Time */
- RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) |
- (InterFrameGap << TxInterFrameGapShift));
+ u32 val = TX_DMA_BURST << TxDMAShift |
+ InterFrameGap << TxInterFrameGapShift;
+
+ if (tp->mac_version >= RTL_GIGA_MAC_VER_34 &&
+ tp->mac_version != RTL_GIGA_MAC_VER_39)
+ val |= TXCFG_AUTO_FIFO;
+
+ RTL_W32(tp, TxConfig, val);
}

static void rtl_set_rx_max_size(struct rtl8169_private *tp)
@@ -5011,7 +5016,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)

rtl_disable_clock_request(tp);

- RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);

/* Adjust EEE LED frequency */
@@ -5045,7 +5049,6 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)

rtl_disable_clock_request(tp);

- RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
@@ -5090,8 +5093,6 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)

static void rtl_hw_start_8168g(struct rtl8169_private *tp)
{
- RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
-
rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC);
@@ -5189,8 +5190,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
rtl_hw_aspm_clkreq_enable(tp, false);
rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1));

- RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
-
rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC);
@@ -5273,8 +5272,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
{
rtl8168ep_stop_cmac(tp);

- RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
-
rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC);
rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x5f, ERIAR_EXGMAC);
@@ -5596,7 +5593,6 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
/* Force LAN exit from ASPM if Rx/Tx are not idle */
RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);

- RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);

rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
--
2.19.0