On Mon, Jul 18, 2022 at 03:43:15PM +0300, Tariq Toukan wrote:
Reviewed-by: Gal Pressman <gal@xxxxxxxxxx>
Acked-by: Saeed Mahameed <saeedm@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
drivers/net/ethernet/mellanox/mlx5/core/eq.c | 62 +++++++++++++++++++-
1 file changed, 59 insertions(+), 3 deletions(-)
v2:
Separated the set_cpu operation into two functions, per Saeed's suggestion.
Added Saeed's Acked-by signature.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 229728c80233..e72bdaaad84f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -11,6 +11,9 @@
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
+#ifdef CONFIG_NUMA
+#include <linux/sched/topology.h>
+#endif
#include "mlx5_core.h"
#include "lib/eq.h"
#include "fpga/core.h"
@@ -806,13 +809,67 @@ static void comp_irqs_release(struct mlx5_core_dev *dev)
kfree(table->comp_irqs);
}
+static void set_cpus_by_local_spread(struct mlx5_core_dev *dev, u16 *cpus,
+ int ncomp_eqs)
+{
+ int i;
+
+ for (i = 0; i < ncomp_eqs; i++)
+ cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
+}
+
+static bool set_cpus_by_numa_distance(struct mlx5_core_dev *dev, u16 *cpus,
+ int ncomp_eqs)
+{
+#ifdef CONFIG_NUMA
+ cpumask_var_t cpumask;
+ int first;
+ int i;
+
+ if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL)) {
+ mlx5_core_err(dev, "zalloc_cpumask_var failed\n");
+ return false;
+ }
+ cpumask_copy(cpumask, cpu_online_mask);
+
+ first = cpumask_local_spread(0, dev->priv.numa_node);
Arguably you want something like:
first = cpumask_any(cpumask_of_node(dev->priv.numa_node));
+
+ for (i = 0; i < ncomp_eqs; i++) {
+ int cpu;
+
+ cpu = sched_numa_find_closest(cpumask, first);
+ if (cpu >= nr_cpu_ids) {
+ mlx5_core_err(dev, "sched_numa_find_closest failed, cpu(%d) >= nr_cpu_ids(%d)\n",
+ cpu, nr_cpu_ids);
+
+ free_cpumask_var(cpumask);
+ return false;
So this will fail when ncomp_eqs > cpumask_weight(online_cpus); is that
desired?
+ }
+ cpus[i] = cpu;
+ cpumask_clear_cpu(cpu, cpumask);
Since there is no concurrency on this cpumask, you don't need atomic
ops:
__cpumask_clear_cpu(..);
+ }
+
+ free_cpumask_var(cpumask);
+ return true;
+#else
+ return false;
+#endif
+}
+
+static void mlx5_set_eqs_cpus(struct mlx5_core_dev *dev, u16 *cpus, int ncomp_eqs)
+{
+ bool success = set_cpus_by_numa_distance(dev, cpus, ncomp_eqs);
+
+ if (!success)
+ set_cpus_by_local_spread(dev, cpus, ncomp_eqs);
+}
+
static int comp_irqs_request(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
int ncomp_eqs = table->num_comp_eqs;
u16 *cpus;
int ret;
- int i;
ncomp_eqs = table->num_comp_eqs;
table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
@@ -830,8 +887,7 @@ static int comp_irqs_request(struct mlx5_core_dev *dev)
ret = -ENOMEM;
goto free_irqs;
}
- for (i = 0; i < ncomp_eqs; i++)
- cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
+ mlx5_set_eqs_cpus(dev, cpus, ncomp_eqs);
So you change this for mlx5, what about the other users of
cpumask_local_spread() ?