RE: [PATCH v6 5/5] KVM: selftests: Allowing running dirty_log_perf_test on specific CPUs

From: Wang, Wei W
Date: Thu Oct 27 2022 - 08:04:28 EST


On Wednesday, October 26, 2022 11:44 PM, Sean Christopherson wrote:
> > I think it would be better to do the thread pinning at the time when
> > the thread is created by providing a pthread_attr_t attr, e.g. :
> >
> > pthread_attr_t attr;
> >
> > CPU_SET(vcpu->pcpu, &cpu_set);
> > pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpu_set);
> > pthread_create(thread, attr,...);
> >
> > Also, pinning a vCPU thread to a pCPU is a general operation which
> > other users would need. I think we could make it more general and put
> > it to kvm_util.
>
> We could, but it taking advantage of the pinning functionality would require
> plumbing a command line option for every test,

I think we could make this "pinning" be optional (no need to force everyone
to use it).

> or alternatively adding partial
> command line parsing with a "hidden" global struct to kvm_selftest_init(),
> though handling error checking for a truly generic case would be a mess. Either
> way, extending pinning to other tests would require non-trivial effort, and can be
> done on top of this series.
>
> That said, it's also trival to extract the pinning helpers to common code, and I
> can't think of any reason not to do that straightaway.
>
> Vipin, any objection to squashing the below diff with patch 5?
>
> > e.g. adding it to the helper function that I'm trying to create
>
> If we go this route in the future, we'd need to add a worker trampoline as the
> pinning needs to happen in the worker task itself to guarantee that the pinning
> takes effect before the worker does anything useful. That should be very
> doable.

The alternative way is the one I shared before, using this:

/* Thread created with attribute ATTR will be limited to run only on
the processors represented in CPUSET. */
extern int pthread_attr_setaffinity_np (pthread_attr_t *__attr,
size_t __cpusetsize,
const cpu_set_t *__cpuset)

Basically, the thread is created on the pCPU as user specified.
I think this is better than "creating the thread on an arbitrary pCPU
and then pinning it to the user specified pCPU in the thread's start routine".

>
> I do like the idea of extending __vcpu_thread_create(), but we can do that once
> __vcpu_thread_create() lands to avoid further delaying this series.

Sounds good. I can move some of those to vcpu_thread_create() once it's ready later.

> struct perf_test_args {
> @@ -43,8 +41,12 @@ struct perf_test_args {
> bool nested;
> /* True if all vCPUs are pinned to pCPUs */
> bool pin_vcpus;
> + /* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */
> + uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS];

How about putting the pcpu id to "struct kvm_vcpu"? (please see below code
posed to shows how that works). This is helpful when we later make this more generic,
as kvm_vcpu is used by everyone.

Probably we also don't need "bool pin_vcpus". We could initialize
pcpu_id to -1 to indicate that the vcpu doesn't need pinning (this is also what I meant
above optional for other users).

Put the whole changes together (tested and worked fine), FYI:

diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index b30500cc197e..2829c98078d0 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -304,7 +304,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
int nr_vcpus = params->nr_vcpus;

vm = perf_test_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
- params->backing_src, !overlap_memory_access);
+ params->backing_src, !overlap_memory_access, NULL);

perf_test_start_vcpu_threads(nr_vcpus, vcpu_thread_main);

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index dcdb6964b1dc..e19c3ce32c62 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -286,7 +286,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
int r, i;

vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
- p->src_type, p->partition_vcpu_memory_access);
+ p->src_type, p->partition_vcpu_memory_access, NULL);

demand_paging_size = get_backing_src_pagesz(p->src_type);

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 35504b36b126..cbe7de28e094 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -132,6 +132,7 @@ struct test_params {
bool partition_vcpu_memory_access;
enum vm_mem_backing_src_type backing_src;
int slots;
+ char *pcpu_list;
};

static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
@@ -223,7 +224,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)

vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
p->slots, p->backing_src,
- p->partition_vcpu_memory_access);
+ p->partition_vcpu_memory_access,
+ p->pcpu_list);

perf_test_set_wr_fract(vm, p->wr_fract);

@@ -401,13 +403,13 @@ static void help(char *name)
int main(int argc, char *argv[])
{
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
- const char *pcpu_list = NULL;
struct test_params p = {
.iterations = TEST_HOST_LOOP_N,
.wr_fract = 1,
.partition_vcpu_memory_access = true,
.backing_src = DEFAULT_VM_MEM_SRC,
.slots = 1,
+ .pcpu_list = NULL,
};
int opt;
@@ -424,7 +426,7 @@ int main(int argc, char *argv[])
guest_percpu_mem_size = parse_size(optarg);
break;
case 'c':
- pcpu_list = optarg;
+ p.pcpu_list = optarg;
break;
case 'e':
/* 'e' is for evil. */
@@ -471,9 +473,6 @@ int main(int argc, char *argv[])
}
}

- if (pcpu_list)
- perf_test_setup_pinning(pcpu_list, nr_vcpus);
-
TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations");

pr_info("Test iterations: %"PRIu64"\n", p.iterations);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index c9286811a4cb..d403b374bae5 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -7,7 +7,11 @@
#ifndef SELFTEST_KVM_UTIL_H
#define SELFTEST_KVM_UTIL_H

+#include <pthread.h>
+
#include "kvm_util_base.h"
#include "ucall_common.h"

+void kvm_parse_vcpu_pinning(struct kvm_vcpu **vcpu, int nr_vcpus, const char *pcpus_string);
+
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index e42a09cd24a0..79867a478a81 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -47,6 +47,7 @@ struct userspace_mem_region {
struct kvm_vcpu {
struct list_head list;
uint32_t id;
+ int pcpu_id;
int fd;
struct kvm_vm *vm;
struct kvm_run *run;
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index ccfe3b9dc6bd..81428022bdb5 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -52,7 +52,8 @@ extern struct perf_test_args perf_test_args;
struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
uint64_t vcpu_memory_bytes, int slots,
enum vm_mem_backing_src_type backing_src,
- bool partition_vcpu_memory_access);
+ bool partition_vcpu_memory_access,
+ char *pcpu_list);
void perf_test_destroy_vm(struct kvm_vm *vm);

void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index f1cb1627161f..8acee6d4ccbe 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1114,6 +1114,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)

vcpu->vm = vm;
vcpu->id = vcpu_id;
+ vcpu->pcpu_id = -1;
vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id);
TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd));

@@ -2021,3 +2022,58 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
break;
}
}
+
+void kvm_pin_this_task_to_pcpu(uint32_t pcpu)
+{
+ cpu_set_t mask;
+ int r;
+
+ CPU_ZERO(&mask);
+ CPU_SET(pcpu, &mask);
+ r = sched_setaffinity(0, sizeof(mask), &mask);
+ TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu);
+}
+
+static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
+{
+ uint32_t pcpu = atoi_non_negative(cpu_str);
+
+ TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask),
+ "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu);
+ return pcpu;
+}
+
+void kvm_parse_vcpu_pinning(struct kvm_vcpu **vcpu, int nr_vcpus, const char *pcpus_string)
+{
+ cpu_set_t allowed_mask;
+ char *cpu, *cpu_list;
+ char delim[2] = ",";
+ int i, r;
+
+ if (!pcpus_string)
+ return;
+
+ cpu_list = strdup(pcpus_string);
+ TEST_ASSERT(cpu_list, "strdup() allocation failed.\n");
+
+ r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ TEST_ASSERT(!r, "sched_getaffinity() failed");
+
+ cpu = strtok(cpu_list, delim);
+
+ /* 1. Get all pcpus for vcpus. */
+ for (i = 0; i < nr_vcpus; i++) {
+ TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i);
+ vcpu[i]->pcpu_id = parse_pcpu(cpu, &allowed_mask);
+ cpu = strtok(NULL, delim);
+ }
+
+ /* 2. Check if the main worker needs to be pinned. */
+ if (cpu) {
+ kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask));
+ cpu = strtok(NULL, delim);
+ }
+
+ TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu);
+ free(cpu_list);
+}
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 520d1f896d61..95166c5a77f7 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -112,7 +112,8 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
uint64_t vcpu_memory_bytes, int slots,
enum vm_mem_backing_src_type backing_src,
- bool partition_vcpu_memory_access)
+ bool partition_vcpu_memory_access,
+ char *pcpu_list)
{
struct perf_test_args *pta = &perf_test_args;
struct kvm_vm *vm;
@@ -157,7 +158,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
*/
vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages,
perf_test_guest_code, vcpus);
-
+ kvm_parse_vcpu_pinning(vcpus, nr_vcpus, pcpu_list);
pta->vm = vm;

/* Put the test region at the top guest physical memory. */
@@ -284,17 +285,29 @@ void perf_test_start_vcpu_threads(int nr_vcpus,
void (*vcpu_fn)(struct perf_test_vcpu_args *))
{
int i;
+ pthread_attr_t attr, *attr_p;
+ cpu_set_t cpuset;

vcpu_thread_fn = vcpu_fn;
WRITE_ONCE(all_vcpu_threads_running, false);

for (i = 0; i < nr_vcpus; i++) {
struct vcpu_thread *vcpu = &vcpu_threads[i];
+ attr_p = NULL;

vcpu->vcpu_idx = i;
WRITE_ONCE(vcpu->running, false);

- pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu);
+ if (vcpus[i]->pcpu_id != -1) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(vcpus[i]->pcpu_id, &cpuset);
+ pthread_attr_init(&attr);
+ pthread_attr_setaffinity_np(&attr,
+ sizeof(cpu_set_t), &cpuset);
+ attr_p = &attr;
+ }
+
+ pthread_create(&vcpu->thread, attr_p, vcpu_thread_main, vcpu);
}
for (i = 0; i < nr_vcpus; i++) {
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 9968800ec2ec..5dbe09537b2d 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -99,7 +99,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)

vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
VM_MEM_SRC_ANONYMOUS,
- p->partition_vcpu_memory_access);
+ p->partition_vcpu_memory_access, NULL);

pr_info("Finished creating vCPUs\n");