[PATCH AUTOSEL 5.10 22/31] RDMA/rxe: Limit the number of calls to each tasklet

From: Sasha Levin
Date: Sun Aug 14 2022 - 11:50:28 EST


From: Bob Pearson <rpearsonhpe@xxxxxxxxx>

[ Upstream commit eff6d998ca297cb0b2e53b032a56cf8e04dd8b17 ]

Limit the maximum number of calls to each tasklet from rxe_do_task()
before yielding the cpu. When the limit is reached reschedule the tasklet
and exit the calling loop. This patch prevents one tasklet from consuming
100% of a cpu core and causing a deadlock or soft lockup.

Link: https://lore.kernel.org/r/20220630190425.2251-9-rpearsonhpe@xxxxxxxxx
Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx>
Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
drivers/infiniband/sw/rxe/rxe_param.h | 6 ++++++
drivers/infiniband/sw/rxe/rxe_task.c | 16 ++++++++++++----
2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index f9fb56ec6dfd..dca86422b0a2 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -98,6 +98,12 @@ enum rxe_device_param {
RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64,
RXE_INFLIGHT_SKBS_PER_QP_LOW = 16,

+ /* Max number of interations of each tasklet
+ * before yielding the cpu to let other
+ * work make progress
+ */
+ RXE_MAX_ITERATIONS = 1024,
+
/* Delay before calling arbiter timer */
RXE_NSEC_ARB_TIMER_DELAY = 200,

diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 6951fdcb31bf..568cf56c236b 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -8,7 +8,7 @@
#include <linux/interrupt.h>
#include <linux/hardirq.h>

-#include "rxe_task.h"
+#include "rxe.h"

int __rxe_do_task(struct rxe_task *task)

@@ -34,6 +34,7 @@ void rxe_do_task(struct tasklet_struct *t)
int ret;
unsigned long flags;
struct rxe_task *task = from_tasklet(task, t, tasklet);
+ unsigned int iterations = RXE_MAX_ITERATIONS;

spin_lock_irqsave(&task->state_lock, flags);
switch (task->state) {
@@ -62,13 +63,20 @@ void rxe_do_task(struct tasklet_struct *t)
spin_lock_irqsave(&task->state_lock, flags);
switch (task->state) {
case TASK_STATE_BUSY:
- if (ret)
+ if (ret) {
task->state = TASK_STATE_START;
- else
+ } else if (iterations--) {
cont = 1;
+ } else {
+ /* reschedule the tasklet and exit
+ * the loop to give up the cpu
+ */
+ tasklet_schedule(&task->tasklet);
+ task->state = TASK_STATE_START;
+ }
break;

- /* soneone tried to run the task since the last time we called
+ /* someone tried to run the task since the last time we called
* func, so we will call one more time regardless of the
* return value
*/
--
2.35.1