Re: [PATCH rdma-next 2/2] RDMA/core: Add a netevent notifier to cma

From: Jason Gunthorpe
Date: Tue May 10 2022 - 20:04:20 EST


On Mon, Apr 04, 2022 at 03:27:27PM +0300, Leon Romanovsky wrote:

> @@ -5054,10 +5061,95 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
> return ret;
> }
>
> +static void cma_netevent_work_handler(struct work_struct *_work)
> +{
> + struct cma_netevent_work *network =
> + container_of(_work, struct cma_netevent_work, work);
> + struct rdma_cm_event event = {};
> +
> + mutex_lock(&network->id_priv->handler_mutex);
> +
> + if (READ_ONCE(network->id_priv->state) == RDMA_CM_DESTROYING ||
> + READ_ONCE(network->id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
> + goto out_unlock;
> +
> + event.event = RDMA_CM_EVENT_UNREACHABLE;
> + event.status = -ETIMEDOUT;
> +
> + if (cma_cm_event_handler(network->id_priv, &event)) {
> + __acquire(&network->id_priv->handler_mutex);

??

> + network->id_priv->cm_id.ib = NULL;
> + cma_id_put(network->id_priv);
> + destroy_id_handler_unlock(network->id_priv);
> + kfree(network);
> + return;
> + }
> +
> +out_unlock:
> + mutex_unlock(&network->id_priv->handler_mutex);
> + cma_id_put(network->id_priv);
> + kfree(network);
> +}
> +
> +static int cma_netevent_callback(struct notifier_block *self,
> + unsigned long event, void *ctx)
> +{
> + struct id_table_entry *ips_node = NULL;
> + struct rdma_id_private *current_id;
> + struct cma_netevent_work *network;
> + struct neighbour *neigh = ctx;
> + unsigned long flags;
> +
> + if (event != NETEVENT_NEIGH_UPDATE)
> + return NOTIFY_DONE;
> +
> + spin_lock_irqsave(&id_table_lock, flags);
> + if (neigh->tbl->family == AF_INET6) {
> + struct sockaddr_in6 neigh_sock_6;
> +
> + neigh_sock_6.sin6_family = AF_INET6;
> + neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
> + ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
> + (struct sockaddr *)&neigh_sock_6);
> + } else if (neigh->tbl->family == AF_INET) {
> + struct sockaddr_in neigh_sock_4;
> +
> + neigh_sock_4.sin_family = AF_INET;
> + neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
> + ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
> + (struct sockaddr *)&neigh_sock_4);
> + } else
> + goto out;
> +
> + if (!ips_node)
> + goto out;
> +
> + list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
> + if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
> + neigh->ha, ETH_ALEN))
> + continue;
> + network = kzalloc(sizeof(*network), GFP_ATOMIC);
> + if (!network)
> + goto out;
> +
> + INIT_WORK(&network->work, cma_netevent_work_handler);
> + network->id_priv = current_id;
> + cma_id_get(current_id);
> + queue_work(cma_netevent_wq, &network->work);

This is pretty ugly that we need to do atomic allocations for every
matching id.

It would be better to add the work directly to the rdma_cm_id and just
waste that memory.

> + cma_netevent_wq = alloc_ordered_workqueue("rdma_cm_netevent", 0);
> + if (!cma_netevent_wq) {
> + ret = -ENOMEM;
> + goto err_netevent_wq;
> + }

Why do we need another WQ? Why does it need to be ordered?

Jason