+ */
+void ib_process_cq_direct(struct ib_cq *cq)
+{
+ WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
+
+ __ib_process_cq(cq, INT_MAX);
+}
+EXPORT_SYMBOL(ib_process_cq_direct);
My proposal is to drop this function and to export __ib_process_cq()
instead (with or without renaming). That will allow callers of this
function to compare the poll budget with the number of completions that
have been processed and use that information to decide whether or not to
call this function again.
+static void ib_cq_poll_work(struct work_struct *work)
+{
+ struct ib_cq *cq = container_of(work, struct ib_cq, work);
+ int completed;
+
+ completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
+ if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
+ ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+ queue_work(ib_comp_wq, &cq->work);
+}
+
+static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
+{
+ queue_work(ib_comp_wq, &cq->work);
+}
The above code will cause all polling to occur on the context of the CPU
that received the completion interrupt. This approach is not powerful
enough. For certain workloads throughput is higher if work completions
are processed by another CPU core on the same CPU socket. Has it been
considered to make the CPU core on which work completions are processed
configurable ?