Re: [RFC PATCH net-next v5 05/11] net/smc: implement DMB-related operations of loopback-ism

From: Wen Gu
Date: Thu Apr 04 2024 - 06:20:31 EST




On 2024/4/4 01:20, Gerd Bayer wrote:
On Sun, 2024-03-24 at 21:55 +0800, Wen Gu wrote:

When I instrumented this to see, why I still see tons of my other
temporary instrumentation messages from the "ism" driver, I found that
in my setup loopback-ism is used rather infrequently.

I suspect this is due to how the SMC proposals are constructed in
net/smc/af_smc.c and net/smc/smc_pnet.c - and later evaluated in
smc_check_ism_v2_match() - where there is a first-come-first-serve
selection.

I wonder if one should change that to favour loopback-ism over "real"
ISM devices - and how this could be achieved elegantly.

Just some food for thought... Probably little you can do on x86.


Yes, it is about the priority of available ISM devices, and now it
is decided by their order in the smcd_dev_list. The later registered
ISMv2 devices(without pnetid) will be added to the beginning of the
list (see smcd_register_dev()). So there is a probability that
loopback-ism will not be ranked first, since it is added into list
earlier during smc_init().

If we have the runtime switch of loopback-ism, we can re-active the
loopback-ism, that make it be re-added into the beginning of the dev
list and be chosen first. Or a new netlink command to adjust the slot
order of available ISM devices in the list. As we discussed before,
that could be tasks in stage 1 or stage 2.

Thanks!

Thanks,
Gerd

+static int smc_lo_register_dmb(struct smcd_dev *smcd, struct
smcd_dmb *dmb,
+        void *client_priv)
+{
+ struct smc_lo_dmb_node *dmb_node, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+ int sba_idx, rc;
+
+ /* check space for new dmb */
+ for_each_clear_bit(sba_idx, ldev->sba_idx_mask,
SMC_LO_MAX_DMBS) {
+ if (!test_and_set_bit(sba_idx, ldev->sba_idx_mask))
+ break;
+ }
+ if (sba_idx == SMC_LO_MAX_DMBS)
+ return -ENOSPC;
+
+ dmb_node = kzalloc(sizeof(*dmb_node), GFP_KERNEL);
+ if (!dmb_node) {
+ rc = -ENOMEM;
+ goto err_bit;
+ }
+
+ dmb_node->sba_idx = sba_idx;
+ dmb_node->len = dmb->dmb_len;
+ dmb_node->cpu_addr = kzalloc(dmb_node->len, GFP_KERNEL |
+      __GFP_NOWARN | __GFP_NORETRY |
+      __GFP_NOMEMALLOC);
+ if (!dmb_node->cpu_addr) {
+ rc = -ENOMEM;
+ goto err_node;
+ }
+ dmb_node->dma_addr = SMC_DMA_ADDR_INVALID;
+
+again:
+ /* add new dmb into hash table */
+ get_random_bytes(&dmb_node->token, sizeof(dmb_node->token));
+ write_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list,
dmb_node->token) {
+ if (tmp_node->token == dmb_node->token) {
+ write_unlock_bh(&ldev->dmb_ht_lock);
+ goto again;
+ }
+ }
+ hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token);
+ write_unlock_bh(&ldev->dmb_ht_lock);
+
+ dmb->sba_idx = dmb_node->sba_idx;
+ dmb->dmb_tok = dmb_node->token;
+ dmb->cpu_addr = dmb_node->cpu_addr;
+ dmb->dma_addr = dmb_node->dma_addr;
+ dmb->dmb_len = dmb_node->len;
+
+ return 0;
+
+err_node:
+ kfree(dmb_node);
+err_bit:
+ clear_bit(sba_idx, ldev->sba_idx_mask);
+ return rc;
+}
+
+static int smc_lo_unregister_dmb(struct smcd_dev *smcd, struct
smcd_dmb *dmb)
+{
+ struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+
+ /* remove dmb from hash table */
+ write_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb-
dmb_tok) {
+ if (tmp_node->token == dmb->dmb_tok) {
+ dmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!dmb_node) {
+ write_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ hash_del(&dmb_node->list);
+ write_unlock_bh(&ldev->dmb_ht_lock);
+
+ clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask);
+ kfree(dmb_node->cpu_addr);
+ kfree(dmb_node);
+
+ return 0;
+}
+
 static int smc_lo_add_vlan_id(struct smcd_dev *smcd, u64 vlan_id)
 {
  return -EOPNOTSUPP;
@@ -75,6 +164,38 @@ static int smc_lo_signal_event(struct smcd_dev
*dev, struct smcd_gid *rgid,
  return 0;
 }
+static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok,
+     unsigned int idx, bool sf, unsigned int
offset,
+     void *data, unsigned int size)
+{
+ struct smc_lo_dmb_node *rmb_node = NULL, *tmp_node;
+ struct smc_lo_dev *ldev = smcd->priv;
+ struct smc_connection *conn;
+
+ read_lock_bh(&ldev->dmb_ht_lock);
+ hash_for_each_possible(ldev->dmb_ht, tmp_node, list,
dmb_tok) {
+ if (tmp_node->token == dmb_tok) {
+ rmb_node = tmp_node;
+ break;
+ }
+ }
+ if (!rmb_node) {
+ read_unlock_bh(&ldev->dmb_ht_lock);
+ return -EINVAL;
+ }
+ memcpy((char *)rmb_node->cpu_addr + offset, data, size);
+ read_unlock_bh(&ldev->dmb_ht_lock);
+
+ if (sf) {
+ conn = smcd->conn[rmb_node->sba_idx];
+ if (conn && !conn->killed)
+ tasklet_schedule(&conn->rx_tsklet);
+ else
+ return -EPIPE;
+ }
+ return 0;
+}
+
 static int smc_lo_supports_v2(void)
 {
  return SMC_LO_V2_CAPABLE;
@@ -101,14 +222,14 @@ static struct device *smc_lo_get_dev(struct
smcd_dev *smcd)
 static const struct smcd_ops lo_ops = {
  .query_remote_gid = smc_lo_query_rgid,
- .register_dmb = NULL,
- .unregister_dmb = NULL,
+ .register_dmb = smc_lo_register_dmb,
+ .unregister_dmb = smc_lo_unregister_dmb,
  .add_vlan_id = smc_lo_add_vlan_id,
  .del_vlan_id = smc_lo_del_vlan_id,
  .set_vlan_required = smc_lo_set_vlan_required,
  .reset_vlan_required = smc_lo_reset_vlan_required,
  .signal_event = smc_lo_signal_event,
- .move_data = NULL,
+ .move_data = smc_lo_move_data,
  .supports_v2 = smc_lo_supports_v2,
  .get_local_gid = smc_lo_get_local_gid,
  .get_chid = smc_lo_get_chid,
@@ -173,6 +294,8 @@ static void smcd_lo_unregister_dev(struct
smc_lo_dev *ldev)
 static int smc_lo_dev_init(struct smc_lo_dev *ldev)
 {
  smc_lo_generate_ids(ldev);
+ rwlock_init(&ldev->dmb_ht_lock);
+ hash_init(ldev->dmb_ht);
  return smcd_lo_register_dev(ldev);
 }
diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
index 11868e5ac732..6c4a390430f3 100644
--- a/net/smc/smc_loopback.h
+++ b/net/smc/smc_loopback.h
@@ -20,13 +20,26 @@
 #if IS_ENABLED(CONFIG_SMC_LO)
 #define SMC_LO_MAX_DMBS 5000
+#define SMC_LO_DMBS_HASH_BITS 12
 #define SMC_LO_RESERVED_CHID 0xFFFF
+struct smc_lo_dmb_node {
+ struct hlist_node list;
+ u64 token;
+ u32 len;
+ u32 sba_idx;
+ void *cpu_addr;
+ dma_addr_t dma_addr;
+};
+
 struct smc_lo_dev {
  struct smcd_dev *smcd;
  struct device dev;
  u16 chid;
  struct smcd_gid local_gid;
+ rwlock_t dmb_ht_lock;
+ DECLARE_BITMAP(sba_idx_mask, SMC_LO_MAX_DMBS);
+ DECLARE_HASHTABLE(dmb_ht, SMC_LO_DMBS_HASH_BITS);
 };
 #endif