RE: [PATCH v2] hv_netvsc: Allocate rx indirection table size dynamically

From: Haiyang Zhang
Date: Wed May 24 2023 - 12:21:45 EST




> -----Original Message-----
> From: Shradha Gupta <shradhagupta@xxxxxxxxxxxxxxxxxxx>
> Sent: Wednesday, May 24, 2023 5:57 AM
> To: linux-kernel@xxxxxxxxxxxxxxx; linux-hyperv@xxxxxxxxxxxxxxx;
> netdev@xxxxxxxxxxxxxxx
> Cc: Shradha Gupta <shradhagupta@xxxxxxxxxxxxxxxxxxx>; Eric Dumazet
> <edumazet@xxxxxxxxxx>; Jakub Kicinski <kuba@xxxxxxxxxx>; Paolo Abeni
> <pabeni@xxxxxxxxxx>; KY Srinivasan <kys@xxxxxxxxxxxxx>; Haiyang Zhang
> <haiyangz@xxxxxxxxxxxxx>; Wei Liu <wei.liu@xxxxxxxxxx>; Dexuan Cui
> <decui@xxxxxxxxxxxxx>; Long Li <longli@xxxxxxxxxxxxx>; Michael Kelley
> (LINUX) <mikelley@xxxxxxxxxxxxx>; David S. Miller <davem@xxxxxxxxxxxxx>;
> Steen Hegelund <steen.hegelund@xxxxxxxxxxxxx>; Simon Horman
> <simon.horman@xxxxxxxxxxxx>
> Subject: [PATCH v2] hv_netvsc: Allocate rx indirection table size dynamically
>
> Allocate the size of rx indirection table dynamically in netvsc
> from the value of size provided by OID_GEN_RECEIVE_SCALE_CAPABILITIES
> query instead of using a constant value of ITAB_NUM.
>
> Signed-off-by: Shradha Gupta <shradhagupta@xxxxxxxxxxxxxxxxxxx>
> ---
> Changes in v2:
> * Added a missing free for rx_table to fix a leak
> * Corrected alignment around rx table size query
> * Fixed incorrect error handling for rx_table pointer.
> ---
> drivers/net/hyperv/hyperv_net.h | 5 ++++-
> drivers/net/hyperv/netvsc_drv.c | 18 ++++++++++++++----
> drivers/net/hyperv/rndis_filter.c | 22 ++++++++++++++++++----
> 3 files changed, 36 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/hyperv/hyperv_net.h
> b/drivers/net/hyperv/hyperv_net.h
> index dd5919ec408b..1dbdb65ca8f0 100644
> --- a/drivers/net/hyperv/hyperv_net.h
> +++ b/drivers/net/hyperv/hyperv_net.h
> @@ -74,6 +74,7 @@ struct ndis_recv_scale_cap { /*
> NDIS_RECEIVE_SCALE_CAPABILITIES */
> #define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40
>
> #define ITAB_NUM 128
> +#define ITAB_NUM_MAX 256
>
> struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
> struct ndis_obj_header hdr;
> @@ -1034,7 +1035,9 @@ struct net_device_context {
>
> u32 tx_table[VRSS_SEND_TAB_SIZE];
>
> - u16 rx_table[ITAB_NUM];
> + u16 *rx_table;
> +
> + int rx_table_sz;
>
> /* Ethtool settings */
> u8 duplex;
> diff --git a/drivers/net/hyperv/netvsc_drv.c
> b/drivers/net/hyperv/netvsc_drv.c
> index 0103ff914024..ab791e4ca63c 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -1040,6 +1040,13 @@ static int netvsc_detach(struct net_device *ndev,
>
> rndis_filter_device_remove(hdev, nvdev);
>
> + /*
> + * Free the rx indirection table and reset the table size to 0.
> + * With the netvsc_attach call it will get allocated again.
> + */
> + ndev_ctx->rx_table_sz = 0;
> + kfree(ndev_ctx->rx_table);
> +

Please move the table free to rndis_filter_device_remove() which is the counter
part of rndis_filter_device_add(). So it's automatically called by netvsc_detach/remove()
and other error cases.

Also, set ndev_ctx->rx_table = NULL after free to prevent potential double free, or
accessing freed memory, etc.

> return 0;
> }
>
> @@ -1747,7 +1754,9 @@ static u32 netvsc_get_rxfh_key_size(struct
> net_device *dev)
>
> static u32 netvsc_rss_indir_size(struct net_device *dev)
> {
> - return ITAB_NUM;
> + struct net_device_context *ndc = netdev_priv(dev);
> +
> + return ndc->rx_table_sz;
> }
>
> static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
> @@ -1766,7 +1775,7 @@ static int netvsc_get_rxfh(struct net_device *dev,
> u32 *indir, u8 *key,
>
> rndis_dev = ndev->extension;
> if (indir) {
> - for (i = 0; i < ITAB_NUM; i++)
> + for (i = 0; i < ndc->rx_table_sz; i++)
> indir[i] = ndc->rx_table[i];
> }
>
> @@ -1792,11 +1801,11 @@ static int netvsc_set_rxfh(struct net_device
> *dev, const u32 *indir,
>
> rndis_dev = ndev->extension;
> if (indir) {
> - for (i = 0; i < ITAB_NUM; i++)
> + for (i = 0; i < ndc->rx_table_sz; i++)
> if (indir[i] >= ndev->num_chn)
> return -EINVAL;
>
> - for (i = 0; i < ITAB_NUM; i++)
> + for (i = 0; i < ndc->rx_table_sz; i++)
> ndc->rx_table[i] = indir[i];
> }

Please use the ethtool to change & show the table contents. So these functions
are tested:
ethtool -x eth0
ethtool -X eth0 ...

Also, run perf test to ensure no perf regression.

>
> @@ -2638,6 +2647,7 @@ static void netvsc_remove(struct hv_device *dev)
>
> hv_set_drvdata(dev, NULL);
>
> + kfree(ndev_ctx->rx_table);

Move the table free to rndis_filter_device_remove() as said earlier.

> free_percpu(ndev_ctx->vf_stats);
> free_netdev(net);
> }
> diff --git a/drivers/net/hyperv/rndis_filter.c
> b/drivers/net/hyperv/rndis_filter.c
> index eea777ec2541..3695c7d3da3a 100644
> --- a/drivers/net/hyperv/rndis_filter.c
> +++ b/drivers/net/hyperv/rndis_filter.c
> @@ -927,7 +927,7 @@ static int rndis_set_rss_param_msg(struct
> rndis_device *rdev,
> struct rndis_set_request *set;
> struct rndis_set_complete *set_complete;
> u32 extlen = sizeof(struct ndis_recv_scale_param) +
> - 4 * ITAB_NUM + NETVSC_HASH_KEYLEN;
> + 4 * ndc->rx_table_sz + NETVSC_HASH_KEYLEN;
> struct ndis_recv_scale_param *rssp;
> u32 *itab;
> u8 *keyp;
> @@ -953,7 +953,7 @@ static int rndis_set_rss_param_msg(struct
> rndis_device *rdev,
> rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 |
> NDIS_HASH_TCP_IPV4 | NDIS_HASH_IPV6 |
> NDIS_HASH_TCP_IPV6;
> - rssp->indirect_tabsize = 4*ITAB_NUM;
> + rssp->indirect_tabsize = 4 * ndc->rx_table_sz;
> rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
> rssp->hashkey_size = NETVSC_HASH_KEYLEN;
> rssp->hashkey_offset = rssp->indirect_taboffset +
> @@ -961,7 +961,7 @@ static int rndis_set_rss_param_msg(struct
> rndis_device *rdev,
>
> /* Set indirection table entries */
> itab = (u32 *)(rssp + 1);
> - for (i = 0; i < ITAB_NUM; i++)
> + for (i = 0; i < ndc->rx_table_sz; i++)
> itab[i] = ndc->rx_table[i];
>
> /* Set hask key values */
> @@ -1548,6 +1548,20 @@ struct netvsc_device
> *rndis_filter_device_add(struct hv_device *dev,
> if (ret || rsscap.num_recv_que < 2)
> goto out;
>
> + if (rsscap.num_indirect_tabent &&
> + rsscap.num_indirect_tabent <= ITAB_NUM_MAX)
> + ndc->rx_table_sz = rsscap.num_indirect_tabent;
> + else
> + ndc->rx_table_sz = ITAB_NUM;
> +
> + ndc->rx_table = kzalloc(sizeof(u16) * ndc->rx_table_sz,
> + GFP_KERNEL);
> + if (!ndc->rx_table) {
> + netdev_err(net, "Error in allocating rx indirection table of
> size %d\n",
> + ndc->rx_table_sz);
> + goto out;
> + }

When no enough memory for the table, it should:
goto err_dev_remv;
So the device_add fails with an error.

Otherwise, it may continue to run with just one channel. The perf will be low, but
the error is not easily visible. That's probably why you didn't find the "if (ndc->rx_table) "
condition error in the patch v1.

Thanks,
- Haiyang