Re: [Intel-wired-lan] [EXTERNAL] [PATCH v5] ice: Add get/set hw address for VFs using devlink commands

From: Wojciech Drewek
Date: Mon Mar 18 2024 - 11:07:57 EST




On 18.03.2024 12:55, Karthik Sundaravel wrote:
> On Fri, Mar 8, 2024 at 3:28 PM Suman Ghosh <sumang@xxxxxxxxxxx> wrote:
>>
>>> ----------------------------------------------------------------------
>>> Changing the MAC address of the VFs are not available via devlink. Add
>>> the function handlers to set and get the HW address for the VFs.
>>>
>>> Signed-off-by: Karthik Sundaravel <ksundara@xxxxxxxxxx>
>>> ---
>>> drivers/net/ethernet/intel/ice/ice_devlink.c | 78 +++++++++++++++++++-
>>> drivers/net/ethernet/intel/ice/ice_sriov.c | 62 ++++++++++++++++
>>> drivers/net/ethernet/intel/ice/ice_sriov.h | 8 ++
>>> 3 files changed, 147 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c
>>> b/drivers/net/ethernet/intel/ice/ice_devlink.c
>>> index 80dc5445b50d..39d4d79ac731 100644
>>> --- a/drivers/net/ethernet/intel/ice/ice_devlink.c
>>> +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
>>> @@ -1576,6 +1576,81 @@ void ice_devlink_destroy_pf_port(struct ice_pf
>>> *pf)
>>> devlink_port_unregister(&pf->devlink_port);
>>> }
>>>
>>> +/**
>>> + * ice_devlink_port_get_vf_fn_mac - .port_fn_hw_addr_get devlink
>>> +handler
>>> + * @port: devlink port structure
>>> + * @hw_addr: MAC address of the port
>>> + * @hw_addr_len: length of MAC address
>>> + * @extack: extended netdev ack structure
>>> + *
>>> + * Callback for the devlink .port_fn_hw_addr_get operation
>>> + * Return: zero on success or an error code on failure.
>>> + */
>>> +
>>> +static int ice_devlink_port_get_vf_fn_mac(struct devlink_port *port,
>>> + u8 *hw_addr, int *hw_addr_len,
>>> + struct netlink_ext_ack *extack)
>>> +{
>>> + struct devlink_port_attrs *attrs = &port->attrs;
>> [Suman] I agree with Wojciech about using container_of:
>
> [Karthik] when I use container_of(), on some occasions I get core dump
> in get and set functions.
> These issues were not seen in the earlier versions.
> Can you please suggest any pointers on what could have gone wrong ?
>
> struct ice_vf *vf = container_of(port, struct ice_vf, devlink_port);
>
> [ 597.658325] ------------[ cut here ]------------
> [ 597.658329] refcount_t: underflow; use-after-free.
> [ 597.658430] CPU: 18 PID: 1926 Comm: devlink Not tainted 6.8.0-rc5-dirty #1
> [ ...]
> [ 597.658506] ? refcount_warn_saturate+0xbe/0x110
> [ 597.658509] ice_devlink_port_get_vf_fn_mac+0x39/0x70 [ice]
> [ 597.658607] ? __pfx_ice_devlink_port_get_vf_fn_mac+0x10/0x10 [ice]
> [ 597.658676] devlink_nl_port_fill+0x314/0xa30
> [ ...]
> [ 597.658835] ---[ end trace 0000000000000000 ]---
>
>
> [ 859.989482] ------------[ cut here ]------------
> [ 859.989485] refcount_t: saturated; leaking memory.
> [ 859.989500] WARNING: CPU: 0 PID: 1965 at lib/refcount.c:19
> refcount_warn_saturate+0x9b/0x110
> [ ...]
> [ 859.989671] ? refcount_warn_saturate+0x9b/0x110
> [ 859.989674] ice_get_vf_by_id+0x87/0xa0 [ice]
> [ 859.989777] ice_set_vf_fn_mac+0x33/0x150 [ice]
> [ 859.989858] ice_devlink_port_set_vf_fn_mac+0x61/0x90 [ice]
> [ 859.989940] devlink_nl_port_set_doit+0x1d3/0x610
> [ ...]
> [ 952.413933] ---[ end trace 0000000000000000 ]---

Ok, I think we forgot about kref here.
Once you have a VF pointer you have to inc the ref count using
kref_get_unless_zero and you have to check return value because the VF
might be already freed. When you don't need the VF's pointer anymore call ice_put_vf.
Would be cool to have Jake's opinion on that though since he implemented it.

>
>>
>>> + struct devlink_port_pci_vf_attrs *pci_vf;
>>> + struct devlink *devlink = port->devlink;
>>> + struct ice_pf *pf;
>>> + struct ice_vf *vf;
>>> + int vf_id;
>>> +
>>> + pf = devlink_priv(devlink);
>>> + pci_vf = &attrs->pci_vf;
>>> + vf_id = pci_vf->vf;
>>> +
>>> + vf = ice_get_vf_by_id(pf, vf_id);
>>> + if (!vf) {
>>> + NL_SET_ERR_MSG_MOD(extack, "Unable to get the vf");
>>> + return -EINVAL;
>>> + }
>>> + ether_addr_copy(hw_addr, vf->dev_lan_addr);
>>> + *hw_addr_len = ETH_ALEN;
>>> +
>>> + ice_put_vf(vf);
>>> + return 0;
>>> +}
>>> +
>>> +/**
>>> + * ice_devlink_port_set_vf_fn_mac - .port_fn_hw_addr_set devlink
>>> +handler
>>> + * @port: devlink port structure
>>> + * @hw_addr: MAC address of the port
>>> + * @hw_addr_len: length of MAC address
>>> + * @extack: extended netdev ack structure
>>> + *
>>> + * Callback for the devlink .port_fn_hw_addr_set operation
>>> + * Return: zero on success or an error code on failure.
>>> + */
>>> +static int ice_devlink_port_set_vf_fn_mac(struct devlink_port *port,
>>> + const u8 *hw_addr,
>>> + int hw_addr_len,
>>> + struct netlink_ext_ack *extack)
>>> +
>>> +{
>>> + struct devlink_port_attrs *attrs = &port->attrs;
>>> + struct devlink_port_pci_vf_attrs *pci_vf;
>>> + struct devlink *devlink = port->devlink;
>>> + struct ice_pf *pf;
>>> + u8 mac[ETH_ALEN];
>>> + int vf_id;
>>> +
>>> + pf = devlink_priv(devlink);
>>> + pci_vf = &attrs->pci_vf;
>>> + vf_id = pci_vf->vf;
>>> +
>>> + ether_addr_copy(mac, hw_addr);
>>> +
>>> + return ice_set_vf_fn_mac(pf, vf_id, mac); }
>>> +
>>> +static const struct devlink_port_ops ice_devlink_vf_port_ops = {
>>> + .port_fn_hw_addr_get = ice_devlink_port_get_vf_fn_mac,
>>> + .port_fn_hw_addr_set = ice_devlink_port_set_vf_fn_mac, };
>>> +
>>> /**
>>> * ice_devlink_create_vf_port - Create a devlink port for this VF
>>> * @vf: the VF to create a port for
>>> @@ -1611,7 +1686,8 @@ int ice_devlink_create_vf_port(struct ice_vf *vf)
>>> devlink_port_attrs_set(devlink_port, &attrs);
>>> devlink = priv_to_devlink(pf);
>>>
>>> - err = devlink_port_register(devlink, devlink_port, vsi->idx);
>>> + err = devlink_port_register_with_ops(devlink, devlink_port,
>>> + vsi->idx, &ice_devlink_vf_port_ops);
>>> if (err) {
>>> dev_err(dev, "Failed to create devlink port for VF %d, error
>>> %d\n",
>>> vf->vf_id, err);
>>> diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c
>>> b/drivers/net/ethernet/intel/ice/ice_sriov.c
>>> index 31314e7540f8..73cf1d9e9daa 100644
>>> --- a/drivers/net/ethernet/intel/ice/ice_sriov.c
>>> +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
>>> @@ -1216,6 +1216,68 @@ ice_get_vf_cfg(struct net_device *netdev, int
>>> vf_id, struct ifla_vf_info *ivi)
>>> return ret;
>>> }
>>>
>>> +/**
>>> + * ice_set_vf_fn_mac
>>> + * @pf: PF to be configure
>>> + * @vf_id: VF identifier
>>> + * @mac: MAC address
>>> + *
>>> + * program VF MAC address
>>> + */
>>> +int ice_set_vf_fn_mac(struct ice_pf *pf, int vf_id, u8 *mac) {
>>> + struct device *dev;
>>> + struct ice_vf *vf;
>>> + int ret;
>>> +
>>> + dev = ice_pf_to_dev(pf);
>>> + if (is_multicast_ether_addr(mac)) {
>>> + dev_err(dev, "%pM not a valid unicast address\n", mac);
>>> + return -EINVAL;
>>> + }
>> [Suman] I would suggest put all the validation checks at the beginning of the function.
>>> +
>>> + vf = ice_get_vf_by_id(pf, vf_id);
>> [Suman] Any reason we are passing vf_id instead of the vf itself? If you decide to pass vf itself please move the ether_addr_equal() check at the beginning also.
>>
>>> + if (!vf)
>>> + return -EINVAL;
>>> +
>>> + /* nothing left to do, unicast MAC already set */
>>> + if (ether_addr_equal(vf->dev_lan_addr, mac) &&
>>> + ether_addr_equal(vf->hw_lan_addr, mac)) {
>>> + ret = 0;
>>> + goto out_put_vf;
>>> + }
>>> +
>>> + ret = ice_check_vf_ready_for_cfg(vf);
>>> + if (ret)
>>> + goto out_put_vf;
>>> +
>>> + mutex_lock(&vf->cfg_lock);
>>> +
>>> + /* VF is notified of its new MAC via the PF's response to the
>>> + * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
>>> + */
>>> + ether_addr_copy(vf->dev_lan_addr, mac);
>>> + ether_addr_copy(vf->hw_lan_addr, mac);
>>> + if (is_zero_ether_addr(mac)) {
>>> + /* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC
>>> */
>>> + vf->pf_set_mac = false;
>>> + dev_info(dev, "Removing MAC on VF %d. VF driver will be
>>> reinitialized\n",
>>> + vf->vf_id);
>>> + } else {
>>> + /* PF will add MAC rule for the VF */
>>> + vf->pf_set_mac = true;
>>> + dev_info(dev, "Setting MAC %pM on VF %d. VF driver will be
>>> reinitialized\n",
>>> + mac, vf_id);
>>> + }
>>> +
>>> + ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
>>> + mutex_unlock(&vf->cfg_lock);
>>> +
>>> +out_put_vf:
>>> + ice_put_vf(vf);
>>> + return ret;
>>> +}
>>> +
>>> /**
>>> * ice_set_vf_mac
>>> * @netdev: network interface device structure diff --git
>>> a/drivers/net/ethernet/intel/ice/ice_sriov.h
>>> b/drivers/net/ethernet/intel/ice/ice_sriov.h
>>> index 346cb2666f3a..a03be184a806 100644
>>> --- a/drivers/net/ethernet/intel/ice/ice_sriov.h
>>> +++ b/drivers/net/ethernet/intel/ice/ice_sriov.h
>>> @@ -28,6 +28,7 @@
>>> #ifdef CONFIG_PCI_IOV
>>> void ice_process_vflr_event(struct ice_pf *pf); int
>>> ice_sriov_configure(struct pci_dev *pdev, int num_vfs);
>>> +int ice_set_vf_fn_mac(struct ice_pf *pf, int vf_id, u8 *mac);
>>> int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac); int
>>> ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info
>>> *ivi); @@ -76,6 +77,13 @@ ice_sriov_configure(struct pci_dev
>>> __always_unused *pdev,
>>> return -EOPNOTSUPP;
>>> }
>>>
>>> +static inline int
>>> +ice_set_vf_fn_mac(struct ice_pf __always_unused *pf,
>>> + int __always_unused vf_id, u8 __always_unused *mac) {
>>> + return -EOPNOTSUPP;
>>> +}
>>> +
>>> static inline int
>>> ice_set_vf_mac(struct net_device __always_unused *netdev,
>>> int __always_unused vf_id, u8 __always_unused *mac)
>>> --
>>> 2.39.3 (Apple Git-145)
>>>
>>
>