Re: [PATCH net-next v2 11/14] sfc: use PF's IOMMU domain for running VF's MCDI commands

From: Gautam Dawar
Date: Mon Mar 13 2023 - 03:35:51 EST



On 3/10/23 10:35, Jason Wang wrote:
Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.


On Tue, Mar 7, 2023 at 7:38 PM Gautam Dawar <gautam.dawar@xxxxxxx> wrote:
This changeset uses MC_CMD_CLIENT_CMD to execute VF's MCDI
commands when running in vDPA mode (STATE_VDPA).
Also, use the PF's IOMMU domain for executing the encapsulated
VF's MCDI commands to isolate DMA of guest buffers in the VF's
IOMMU domain.
This patch also updates the PCIe FN's client id in the efx_nic
structure which is required while running MC_CMD_CLIENT_CMD.

Signed-off-by: Gautam Dawar <gautam.dawar@xxxxxxx>
---
drivers/net/ethernet/sfc/ef100.c | 1 +
drivers/net/ethernet/sfc/ef100_nic.c | 35 +++++++++
drivers/net/ethernet/sfc/mcdi.c | 108 ++++++++++++++++++++++----
drivers/net/ethernet/sfc/mcdi.h | 2 +-
drivers/net/ethernet/sfc/net_driver.h | 2 +
drivers/net/ethernet/sfc/ptp.c | 4 +-
6 files changed, 132 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef100.c b/drivers/net/ethernet/sfc/ef100.c
index c1c69783db7b..8453c9ba0f41 100644
--- a/drivers/net/ethernet/sfc/ef100.c
+++ b/drivers/net/ethernet/sfc/ef100.c
@@ -465,6 +465,7 @@ static int ef100_pci_probe(struct pci_dev *pci_dev,
efx->type = (const struct efx_nic_type *)entry->driver_data;

efx->pci_dev = pci_dev;
+ efx->client_id = MC_CMD_CLIENT_ID_SELF;
pci_set_drvdata(pci_dev, efx);
rc = efx_init_struct(efx, pci_dev);
if (rc)
diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c
index bda4fcbe1126..cd9f724a9e64 100644
--- a/drivers/net/ethernet/sfc/ef100_nic.c
+++ b/drivers/net/ethernet/sfc/ef100_nic.c
@@ -206,9 +206,11 @@ static int efx_ef100_init_datapath_caps(struct efx_nic *efx)
"firmware reports num_mac_stats = %u\n",
efx->num_mac_stats);

+#ifdef CONFIG_SFC_VDPA
nic_data->vdpa_supported = efx_ef100_has_cap(nic_data->datapath_caps3,
CLIENT_CMD_VF_PROXY) &&
efx->type->is_vf;
+#endif
This should be done at patch 4?
Yeah, right. Will fix.

return 0;
}

@@ -1086,6 +1088,35 @@ static int ef100_check_design_params(struct efx_nic *efx)
return rc;
}

+static int efx_ef100_update_client_id(struct efx_nic *efx)
+{
+ struct ef100_nic_data *nic_data = efx->nic_data;
+ unsigned int pf_index = PCIE_FUNCTION_PF_NULL;
+ unsigned int vf_index = PCIE_FUNCTION_VF_NULL;
+ efx_qword_t pciefn;
+ int rc;
+
+ if (efx->pci_dev->is_virtfn)
+ vf_index = nic_data->vf_index;
+ else
+ pf_index = nic_data->pf_index;
+
+ /* Construct PCIE_FUNCTION structure */
+ EFX_POPULATE_QWORD_3(pciefn,
+ PCIE_FUNCTION_PF, pf_index,
+ PCIE_FUNCTION_VF, vf_index,
+ PCIE_FUNCTION_INTF, PCIE_INTERFACE_CALLER);
+ /* look up self client ID */
+ rc = efx_ef100_lookup_client_id(efx, pciefn, &efx->client_id);
+ if (rc) {
+ pci_warn(efx->pci_dev,
+ "%s: Failed to get client ID, rc %d\n",
+ __func__, rc);
+ }
+
+ return rc;
+}
+
/* NIC probe and remove
*/
static int ef100_probe_main(struct efx_nic *efx)
@@ -1173,6 +1204,10 @@ static int ef100_probe_main(struct efx_nic *efx)
goto fail;
efx->port_num = rc;

+ rc = efx_ef100_update_client_id(efx);
+ if (rc)
+ goto fail;
+
efx_mcdi_print_fwver(efx, fw_version, sizeof(fw_version));
pci_dbg(efx->pci_dev, "Firmware version %s\n", fw_version);

diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index a7f2c31071e8..3bf1ebe05775 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -145,14 +145,15 @@ void efx_mcdi_fini(struct efx_nic *efx)
kfree(efx->mcdi);
}

-static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd,
- const efx_dword_t *inbuf, size_t inlen)
+static void efx_mcdi_send_request(struct efx_nic *efx, u32 client_id,
+ unsigned int cmd, const efx_dword_t *inbuf,
+ size_t inlen)
{
struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
#ifdef CONFIG_SFC_MCDI_LOGGING
char *buf = mcdi->logging_buffer; /* page-sized */
#endif
- efx_dword_t hdr[2];
+ efx_dword_t hdr[5];
size_t hdr_len;
u32 xflags, seqno;

@@ -179,7 +180,7 @@ static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd,
MCDI_HEADER_XFLAGS, xflags,
MCDI_HEADER_NOT_EPOCH, !mcdi->new_epoch);
hdr_len = 4;
- } else {
+ } else if (client_id == efx->client_id) {
/* MCDI v2 */
BUG_ON(inlen > MCDI_CTL_SDU_LEN_MAX_V2);
EFX_POPULATE_DWORD_7(hdr[0],
@@ -194,6 +195,35 @@ static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd,
MC_CMD_V2_EXTN_IN_EXTENDED_CMD, cmd,
MC_CMD_V2_EXTN_IN_ACTUAL_LEN, inlen);
hdr_len = 8;
+ } else {
+ /* MCDI v2 */
Just wonder if V2 is a must for vDPA? If yes we probably need to fail
vDPA creation without it.

Agreed, will add this condition in evaluation of nic_data->vdpa_supported.

Thanks


Thanks


+ WARN_ON(inlen > MCDI_CTL_SDU_LEN_MAX_V2);
+ /* MCDI v2 with credentials of a different client */
+ BUILD_BUG_ON(MC_CMD_CLIENT_CMD_IN_LEN != 4);
+ /* Outer CLIENT_CMD wrapper command with client ID */
+ EFX_POPULATE_DWORD_7(hdr[0],
+ MCDI_HEADER_RESPONSE, 0,
+ MCDI_HEADER_RESYNC, 1,
+ MCDI_HEADER_CODE, MC_CMD_V2_EXTN,
+ MCDI_HEADER_DATALEN, 0,
+ MCDI_HEADER_SEQ, seqno,
+ MCDI_HEADER_XFLAGS, xflags,
+ MCDI_HEADER_NOT_EPOCH, !mcdi->new_epoch);
+ EFX_POPULATE_DWORD_2(hdr[1],
+ MC_CMD_V2_EXTN_IN_EXTENDED_CMD,
+ MC_CMD_CLIENT_CMD,
+ MC_CMD_V2_EXTN_IN_ACTUAL_LEN, inlen + 12);
+ MCDI_SET_DWORD(&hdr[2],
+ CLIENT_CMD_IN_CLIENT_ID, client_id);
+
+ /* MCDIv2 header for inner command */
+ EFX_POPULATE_DWORD_2(hdr[3],
+ MCDI_HEADER_CODE, MC_CMD_V2_EXTN,
+ MCDI_HEADER_DATALEN, 0);
+ EFX_POPULATE_DWORD_2(hdr[4],
+ MC_CMD_V2_EXTN_IN_EXTENDED_CMD, cmd,
+ MC_CMD_V2_EXTN_IN_ACTUAL_LEN, inlen);
+ hdr_len = 20;
}

#ifdef CONFIG_SFC_MCDI_LOGGING
@@ -474,7 +504,8 @@ static void efx_mcdi_release(struct efx_mcdi_iface *mcdi)
&mcdi->async_list, struct efx_mcdi_async_param, list);
if (async) {
mcdi->state = MCDI_STATE_RUNNING_ASYNC;
- efx_mcdi_send_request(efx, async->cmd,
+ efx_mcdi_send_request(efx, efx->client_id,
+ async->cmd,
(const efx_dword_t *)(async + 1),
async->inlen);
mod_timer(&mcdi->async_timer,
@@ -797,7 +828,7 @@ static int efx_mcdi_proxy_wait(struct efx_nic *efx, u32 handle, bool quiet)
return mcdi->proxy_rx_status;
}

-static int _efx_mcdi_rpc(struct efx_nic *efx, unsigned int cmd,
+static int _efx_mcdi_rpc(struct efx_nic *efx, u32 client_id, unsigned int cmd,
const efx_dword_t *inbuf, size_t inlen,
efx_dword_t *outbuf, size_t outlen,
size_t *outlen_actual, bool quiet, int *raw_rc)
@@ -811,7 +842,7 @@ static int _efx_mcdi_rpc(struct efx_nic *efx, unsigned int cmd,
return -EINVAL;
}

- rc = efx_mcdi_rpc_start(efx, cmd, inbuf, inlen);
+ rc = efx_mcdi_rpc_start(efx, client_id, cmd, inbuf, inlen);
if (rc)
return rc;

@@ -836,7 +867,8 @@ static int _efx_mcdi_rpc(struct efx_nic *efx, unsigned int cmd,

/* We now retry the original request. */
mcdi->state = MCDI_STATE_RUNNING_SYNC;
- efx_mcdi_send_request(efx, cmd, inbuf, inlen);
+ efx_mcdi_send_request(efx, efx->client_id, cmd,
+ inbuf, inlen);

rc = _efx_mcdi_rpc_finish(efx, cmd, inlen,
outbuf, outlen, outlen_actual,
@@ -855,16 +887,44 @@ static int _efx_mcdi_rpc(struct efx_nic *efx, unsigned int cmd,
return rc;
}

+#ifdef CONFIG_SFC_VDPA
+static bool is_mode_vdpa(struct efx_nic *efx)
+{
+ if (efx->pci_dev->is_virtfn &&
+ efx->pci_dev->physfn &&
+ efx->state == STATE_VDPA &&
+ efx->vdpa_nic)
+ return true;
+
+ return false;
+}
+#endif
+
static int _efx_mcdi_rpc_evb_retry(struct efx_nic *efx, unsigned cmd,
const efx_dword_t *inbuf, size_t inlen,
efx_dword_t *outbuf, size_t outlen,
size_t *outlen_actual, bool quiet)
{
+#ifdef CONFIG_SFC_VDPA
+ struct efx_nic *efx_pf;
+#endif
int raw_rc = 0;
int rc;

- rc = _efx_mcdi_rpc(efx, cmd, inbuf, inlen,
- outbuf, outlen, outlen_actual, true, &raw_rc);
+#ifdef CONFIG_SFC_VDPA
+ if (is_mode_vdpa(efx)) {
+ efx_pf = pci_get_drvdata(efx->pci_dev->physfn);
+ rc = _efx_mcdi_rpc(efx_pf, efx->client_id, cmd, inbuf,
+ inlen, outbuf, outlen, outlen_actual,
+ true, &raw_rc);
+ } else {
+#endif
+ rc = _efx_mcdi_rpc(efx, efx->client_id, cmd, inbuf,
+ inlen, outbuf, outlen, outlen_actual, true,
+ &raw_rc);
+#ifdef CONFIG_SFC_VDPA
+ }
+#endif

if ((rc == -EPROTO) && (raw_rc == MC_CMD_ERR_NO_EVB_PORT) &&
efx->type->is_vf) {
@@ -881,9 +941,22 @@ static int _efx_mcdi_rpc_evb_retry(struct efx_nic *efx, unsigned cmd,

do {
usleep_range(delay_us, delay_us + 10000);
- rc = _efx_mcdi_rpc(efx, cmd, inbuf, inlen,
- outbuf, outlen, outlen_actual,
- true, &raw_rc);
+#ifdef CONFIG_SFC_VDPA
+ if (is_mode_vdpa(efx)) {
+ efx_pf = pci_get_drvdata(efx->pci_dev->physfn);
+ rc = _efx_mcdi_rpc(efx_pf, efx->client_id, cmd,
+ inbuf, inlen, outbuf, outlen,
+ outlen_actual, true,
+ &raw_rc);
+ } else {
+#endif
+ rc = _efx_mcdi_rpc(efx, efx->client_id,
+ cmd, inbuf, inlen, outbuf,
+ outlen, outlen_actual, true,
+ &raw_rc);
+#ifdef CONFIG_SFC_VDPA
+ }
+#endif
if (delay_us < 100000)
delay_us <<= 1;
} while ((rc == -EPROTO) &&
@@ -939,7 +1012,7 @@ int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd,
* function and is then responsible for calling efx_mcdi_display_error
* as needed.
*/
-int efx_mcdi_rpc_quiet(struct efx_nic *efx, unsigned cmd,
+int efx_mcdi_rpc_quiet(struct efx_nic *efx, unsigned int cmd,
const efx_dword_t *inbuf, size_t inlen,
efx_dword_t *outbuf, size_t outlen,
size_t *outlen_actual)
@@ -948,7 +1021,7 @@ int efx_mcdi_rpc_quiet(struct efx_nic *efx, unsigned cmd,
outlen_actual, true);
}

-int efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd,
+int efx_mcdi_rpc_start(struct efx_nic *efx, u32 client_id, unsigned int cmd,
const efx_dword_t *inbuf, size_t inlen)
{
struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
@@ -965,7 +1038,7 @@ int efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd,
return -ENETDOWN;

efx_mcdi_acquire_sync(mcdi);
- efx_mcdi_send_request(efx, cmd, inbuf, inlen);
+ efx_mcdi_send_request(efx, client_id, cmd, inbuf, inlen);
return 0;
}

@@ -1009,7 +1082,8 @@ static int _efx_mcdi_rpc_async(struct efx_nic *efx, unsigned int cmd,
*/
if (mcdi->async_list.next == &async->list &&
efx_mcdi_acquire_async(mcdi)) {
- efx_mcdi_send_request(efx, cmd, inbuf, inlen);
+ efx_mcdi_send_request(efx, efx->client_id,
+ cmd, inbuf, inlen);
mod_timer(&mcdi->async_timer,
jiffies + MCDI_RPC_TIMEOUT);
}
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index dafab52aaef7..2c526d2edeb6 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -150,7 +150,7 @@ int efx_mcdi_rpc_quiet(struct efx_nic *efx, unsigned cmd,
efx_dword_t *outbuf, size_t outlen,
size_t *outlen_actual);

-int efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd,
+int efx_mcdi_rpc_start(struct efx_nic *efx, u32 client_id, unsigned int cmd,
const efx_dword_t *inbuf, size_t inlen);
int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen,
efx_dword_t *outbuf, size_t outlen,
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 1da71deac71c..948c7a06403a 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -859,6 +859,7 @@ struct efx_mae;
* @secondary_list: List of &struct efx_nic instances for the secondary PCI
* functions of the controller, if this is for the primary function.
* Serialised by rtnl_lock.
+ * @client_id: client ID of this PCIe function
* @type: Controller type attributes
* @legacy_irq: IRQ number
* @workqueue: Workqueue for port reconfigures and the HW monitor.
@@ -1022,6 +1023,7 @@ struct efx_nic {
struct list_head secondary_list;
struct pci_dev *pci_dev;
unsigned int port_num;
+ u32 client_id;
const struct efx_nic_type *type;
int legacy_irq;
bool eeh_disabled_legacy_irq;
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 9f07e1ba7780..d90d4f6b3824 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -1052,8 +1052,8 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings)

/* Clear flag that signals MC ready */
WRITE_ONCE(*start, 0);
- rc = efx_mcdi_rpc_start(efx, MC_CMD_PTP, synch_buf,
- MC_CMD_PTP_IN_SYNCHRONIZE_LEN);
+ rc = efx_mcdi_rpc_start(efx, MC_CMD_CLIENT_ID_SELF, MC_CMD_PTP,
+ synch_buf, MC_CMD_PTP_IN_SYNCHRONIZE_LEN);
EFX_WARN_ON_ONCE_PARANOID(rc);

/* Wait for start from MCDI (or timeout) */
--
2.30.1