On Mon, 2024-01-15 at 16:57 +0800, Gang Li wrote:
How about:
```
nid = global_nid;
list_for_each_entry(pw, &works, pw_list)
if (job->numa_aware) {
int old_node = nid;
queue_work_node(nid, system_unbound_wq, &pw->pw_work);
nid = next_node(nid, node_states[N_CPU]);
cmpxchg(&global_nid, old_node, nid);
} else
queue_work(system_unbound_wq, &pw->pw_work);
```
I am thinking something like
static volatile atomic_t last_used_nid;
list_for_each_entry(pw, &works, pw_list)
if (job->numa_aware) {
int old_node = atomic_read(&last_used_nid);
do {
nid = next_node_in(old_node, node_states[N_CPU]);
} while (!atomic_try_cmpxchg(&last_used_nid, &old_node, nid));
queue_work_node(nid, system_unbound_wq, &pw->pw_work);
} else {
queue_work(system_unbound_wq, &pw->pw_work);
}
Note that we need to use next_node_in so we'll wrap around the node mask.