Re: [PATCH rdma-next v2 1/1] RDMA/mana: Provide a modern CQ creation interface
From: Leon Romanovsky
Date: Wed Mar 04 2026 - 06:05:26 EST
On Tue, Mar 03, 2026 at 04:48:25AM -0800, Konstantin Taranov wrote:
> From: Konstantin Taranov <kotaranov@xxxxxxxxxxxxx>
>
> The uverbs CQ creation UAPI allows users to supply their own umem for a CQ.
> Update mana to support this workflow while preserving support for creating
> umem through the legacy interface.
>
> To support RDMA objects that own umem, extend mana_ib_create_queue() to return
> the umem to the caller and do not allocate umem if it was allocted
> by the caller.
>
> Signed-off-by: Konstantin Taranov <kotaranov@xxxxxxxxxxxxx>
> ---
> v2: It is a rework of the patch proposed by Leon
I am curious to know what changes were introduced?
> drivers/infiniband/hw/mana/cq.c | 125 +++++++++++++++++----------
> drivers/infiniband/hw/mana/device.c | 1 +
> drivers/infiniband/hw/mana/main.c | 30 +++++--
> drivers/infiniband/hw/mana/mana_ib.h | 5 +-
> drivers/infiniband/hw/mana/qp.c | 5 +-
> drivers/infiniband/hw/mana/wq.c | 3 +-
> 6 files changed, 111 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
> index b2749f971..fa951732a 100644
> --- a/drivers/infiniband/hw/mana/cq.c
> +++ b/drivers/infiniband/hw/mana/cq.c
> @@ -8,12 +8,8 @@
> int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> struct uverbs_attr_bundle *attrs)
> {
> - struct ib_udata *udata = &attrs->driver_udata;
> struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> - struct mana_ib_create_cq_resp resp = {};
> - struct mana_ib_ucontext *mana_ucontext;
> struct ib_device *ibdev = ibcq->device;
> - struct mana_ib_create_cq ucmd = {};
> struct mana_ib_dev *mdev;
> bool is_rnic_cq;
> u32 doorbell;
> @@ -26,48 +22,91 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> cq->cq_handle = INVALID_MANA_HANDLE;
> is_rnic_cq = mana_ib_is_rnic(mdev);
>
> - if (udata) {
> - if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> - return -EINVAL;
> -
> - err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
> - if (err) {
> - ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
> - return err;
> - }
> + if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1)
> + return -EINVAL;
We are talking about kernel verbs. ULPs are not designed to provide
attributes and recover from random driver limitations.
>
> - if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
> - attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
> - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
> - return -EINVAL;
> - }
> + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
> + cq->cqe = buf_size / COMP_ENTRY_SIZE;
> + err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
> + if (err) {
> + ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
> + return err;
> + }
> + doorbell = mdev->gdma_dev->doorbell;
>
> - cq->cqe = attr->cqe;
> - err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
> - &cq->queue);
> + if (is_rnic_cq) {
> + err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> if (err) {
> - ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
> - return err;
> + ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n", err);
> + goto err_destroy_queue;
> }
>
> - mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
> - ibucontext);
> - doorbell = mana_ucontext->doorbell;
> - } else {
> - if (attr->cqe > U32_MAX / COMP_ENTRY_SIZE / 2 + 1) {
> - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
> - return -EINVAL;
> - }
> - buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
> - cq->cqe = buf_size / COMP_ENTRY_SIZE;
> - err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
> + err = mana_ib_install_cq_cb(mdev, cq);
> if (err) {
> - ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
> - return err;
> + ibdev_dbg(ibdev, "Failed to install cq callback, %d\n", err);
> + goto err_destroy_rnic_cq;
> }
> - doorbell = mdev->gdma_dev->doorbell;
> }
>
> + spin_lock_init(&cq->cq_lock);
> + INIT_LIST_HEAD(&cq->list_send_qp);
> + INIT_LIST_HEAD(&cq->list_recv_qp);
> +
> + return 0;
> +
> +err_destroy_rnic_cq:
> + mana_ib_gd_destroy_cq(mdev, cq);
> +err_destroy_queue:
> + mana_ib_destroy_queue(mdev, &cq->queue);
> +
> + return err;
> +}
> +
> +int mana_ib_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> + struct uverbs_attr_bundle *attrs)
> +{
> + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> + struct ib_udata *udata = &attrs->driver_udata;
> + struct mana_ib_create_cq_resp resp = {};
> + struct mana_ib_ucontext *mana_ucontext;
> + struct ib_device *ibdev = ibcq->device;
> + struct mana_ib_create_cq ucmd = {};
> + struct mana_ib_dev *mdev;
> + bool is_rnic_cq;
> + u32 doorbell;
> + int err;
> +
> + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> +
> + cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
> + cq->cq_handle = INVALID_MANA_HANDLE;
> + is_rnic_cq = mana_ib_is_rnic(mdev);
> +
> + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
> + return -EINVAL;
> +
> + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
> + if (err) {
> + ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
> + return err;
> + }
> +
> + if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
> + attr->cqe > U32_MAX / COMP_ENTRY_SIZE)
> + return -EINVAL;
> +
> + cq->cqe = attr->cqe;
> + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
> + &cq->queue, &ibcq->umem);
> + if (err) {
> + ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
> + return err;
> + }
> +
> + mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
> + ibucontext);
> + doorbell = mana_ucontext->doorbell;
> +
> if (is_rnic_cq) {
> err = mana_ib_gd_create_cq(mdev, cq, doorbell);
> if (err) {
> @@ -82,13 +121,11 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> }
> }
>
> - if (udata) {
> - resp.cqid = cq->queue.id;
> - err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
> - if (err) {
> - ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
> - goto err_remove_cq_cb;
> - }
> + resp.cqid = cq->queue.id;
> + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
> + if (err) {
> + ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
> + goto err_remove_cq_cb;
> }
>
> spin_lock_init(&cq->cq_lock);
> diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
> index ccc2279ca..c5c5fe051 100644
> --- a/drivers/infiniband/hw/mana/device.c
> +++ b/drivers/infiniband/hw/mana/device.c
> @@ -21,6 +21,7 @@ static const struct ib_device_ops mana_ib_dev_ops = {
> .alloc_ucontext = mana_ib_alloc_ucontext,
> .create_ah = mana_ib_create_ah,
> .create_cq = mana_ib_create_cq,
> + .create_user_cq = mana_ib_create_user_cq,
> .create_qp = mana_ib_create_qp,
> .create_rwq_ind_table = mana_ib_create_rwq_ind_table,
> .create_wq = mana_ib_create_wq,
> diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
> index 8d99cd00f..d1f1e217b 100644
> --- a/drivers/infiniband/hw/mana/main.c
> +++ b/drivers/infiniband/hw/mana/main.c
> @@ -262,33 +262,45 @@ int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_qu
> }
>
> int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
> - struct mana_ib_queue *queue)
> + struct mana_ib_queue *queue, struct ib_umem **umem)
> {
> - struct ib_umem *umem;
> int err;
>
> queue->umem = NULL;
> queue->id = INVALID_QUEUE_ID;
> queue->gdma_region = GDMA_INVALID_DMA_REGION;
>
> - umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
> - if (IS_ERR(umem)) {
> - ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", umem);
> - return PTR_ERR(umem);
> + if (umem)
> + queue->umem = *umem;
> +
> + if (!queue->umem) {
> + /* if umem is not provided, allocate it */
> + queue->umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
> + if (IS_ERR(queue->umem)) {
> + ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", queue->umem);
> + return PTR_ERR(queue->umem);
> + }
I moved this hunk to the callers on purpose. The idea is to call to
ib_umem_get() as early as possible.
> }
>
> - err = mana_ib_create_zero_offset_dma_region(mdev, umem, &queue->gdma_region);
> + err = mana_ib_create_zero_offset_dma_region(mdev, queue->umem, &queue->gdma_region);
> if (err) {
> ibdev_dbg(&mdev->ib_dev, "Failed to create dma region, %d\n", err);
> goto free_umem;
> }
> - queue->umem = umem;
>
> ibdev_dbg(&mdev->ib_dev, "created dma region 0x%llx\n", queue->gdma_region);
>
> + if (umem) {
> + /* Give ownership of umem to the caller */
> + *umem = queue->umem;
> + queue->umem = NULL;
> + }
> +
> return 0;
> free_umem:
> - ib_umem_release(umem);
> + if (!umem || !(*umem))
> + /* deallocate mana's umem */
> + ib_umem_release(queue->umem);
This is another reason why ib_umem_get() shouldn't be buried in the
driver's internals. IB/core is responsible to release it.
Thanks