[PATCH 5.12 015/127] RDMA/rxe: Return CQE error if invalid lkey was supplied

From: Greg Kroah-Hartman
Date: Mon May 24 2021 - 12:01:59 EST


From: Leon Romanovsky <leonro@xxxxxxxxxx>

[ Upstream commit dc07628bd2bbc1da768e265192c28ebd301f509d ]

RXE is missing update of WQE status in LOCAL_WRITE failures. This caused
the following kernel panic if someone sent an atomic operation with an
explicitly wrong lkey.

[leonro@vm ~]$ mkt test
test_atomic_invalid_lkey (tests.test_atomic.AtomicTest) ...
WARNING: CPU: 5 PID: 263 at drivers/infiniband/sw/rxe/rxe_comp.c:740 rxe_completer+0x1a6d/0x2e30 [rdma_rxe]
Modules linked in: crc32_generic rdma_rxe ip6_udp_tunnel udp_tunnel rdma_ucm rdma_cm ib_umad ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5_core ptp pps_core
CPU: 5 PID: 263 Comm: python3 Not tainted 5.13.0-rc1+ #2936
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
RIP: 0010:rxe_completer+0x1a6d/0x2e30 [rdma_rxe]
Code: 03 0f 8e 65 0e 00 00 3b 93 10 06 00 00 0f 84 82 0a 00 00 4c 89 ff 4c 89 44 24 38 e8 2d 74 a9 e1 4c 8b 44 24 38 e9 1c f5 ff ff <0f> 0b e9 0c e8 ff ff b8 05 00 00 00 41 bf 05 00 00 00 e9 ab e7 ff
RSP: 0018:ffff8880158af090 EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffff888016a78000 RCX: ffffffffa0cf1652
RDX: 1ffff9200004b442 RSI: 0000000000000004 RDI: ffffc9000025a210
RBP: dffffc0000000000 R08: 00000000ffffffea R09: ffff88801617740b
R10: ffffed1002c2ee81 R11: 0000000000000007 R12: ffff88800f3b63e8
R13: ffff888016a78008 R14: ffffc9000025a180 R15: 000000000000000c
FS: 00007f88b622a740(0000) GS:ffff88806d540000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f88b5a1fa10 CR3: 000000000d848004 CR4: 0000000000370ea0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
rxe_do_task+0x130/0x230 [rdma_rxe]
rxe_rcv+0xb11/0x1df0 [rdma_rxe]
rxe_loopback+0x157/0x1e0 [rdma_rxe]
rxe_responder+0x5532/0x7620 [rdma_rxe]
rxe_do_task+0x130/0x230 [rdma_rxe]
rxe_rcv+0x9c8/0x1df0 [rdma_rxe]
rxe_loopback+0x157/0x1e0 [rdma_rxe]
rxe_requester+0x1efd/0x58c0 [rdma_rxe]
rxe_do_task+0x130/0x230 [rdma_rxe]
rxe_post_send+0x998/0x1860 [rdma_rxe]
ib_uverbs_post_send+0xd5f/0x1220 [ib_uverbs]
ib_uverbs_write+0x847/0xc80 [ib_uverbs]
vfs_write+0x1c5/0x840
ksys_write+0x176/0x1d0
do_syscall_64+0x3f/0x80
entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Link: https://lore.kernel.org/r/11e7b553f3a6f5371c6bb3f57c494bb52b88af99.1620711734.git.leonro@xxxxxxxxxx
Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxx>
Acked-by: Zhu Yanjun <zyjzyj2000@xxxxxxxxx>
Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
drivers/infiniband/sw/rxe/rxe_comp.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index a612b335baa0..06b556169867 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -346,13 +346,15 @@ static inline enum comp_state do_read(struct rxe_qp *qp,
ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
&wqe->dma, payload_addr(pkt),
payload_size(pkt), to_mr_obj, NULL);
- if (ret)
+ if (ret) {
+ wqe->status = IB_WC_LOC_PROT_ERR;
return COMPST_ERROR;
+ }

if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK))
return COMPST_COMP_ACK;
- else
- return COMPST_UPDATE_COMP;
+
+ return COMPST_UPDATE_COMP;
}

static inline enum comp_state do_atomic(struct rxe_qp *qp,
@@ -366,10 +368,12 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp,
ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
&wqe->dma, &atomic_orig,
sizeof(u64), to_mr_obj, NULL);
- if (ret)
+ if (ret) {
+ wqe->status = IB_WC_LOC_PROT_ERR;
return COMPST_ERROR;
- else
- return COMPST_COMP_ACK;
+ }
+
+ return COMPST_COMP_ACK;
}

static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
--
2.30.2