Re: [PATCH v5] kvm: make vcpu life cycle separated from kvm instance

From: Liu ping fan
Date: Tue Dec 27 2011 - 02:54:05 EST

Next message: Adrian Hunter: "Re: [PATCH] intel_mid_dma: remove legacy pm interface"
Previous message: Richard Cochran: "Re: [PATCH 0/4] clock_gettime_ns and x86-64 optimizations"
In reply to: Gleb Natapov: "Re: [PATCH v5] kvm: make vcpu life cycle separated from kvm instance"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

On Mon, Dec 26, 2011 at 7:09 PM, Gleb Natapov <gleb@xxxxxxxxxx> wrote:
> On Sat, Dec 17, 2011 at 11:19:35AM +0800, Liu Ping Fan wrote:
>> From: Liu Ping Fan <pingfank@xxxxxxxxxxxxxxxxxx>
>>
>> Currently, vcpu can be destructed only when kvm instance destroyed.
>> Change this to vcpu's destruction before kvm instance, so vcpu MUST
>> and CAN be destroyed before kvm's destroy.
>>
>> Signed-off-by: Liu Ping Fan <pingfank@xxxxxxxxxxxxxxxxxx>
>> ---
>> Âarch/x86/kvm/i8254.c Â Â | Â 10 +++--
>> Âarch/x86/kvm/i8259.c Â Â | Â 12 ++++--
>> Âarch/x86/kvm/x86.c Â Â Â | Â 53 +++++++++++------------
>> Âinclude/linux/kvm_host.h | Â 20 ++++-----
>> Âvirt/kvm/irq_comm.c Â Â Â| Â Â6 ++-
>> Âvirt/kvm/kvm_main.c Â Â Â| Â106 ++++++++++++++++++++++++++++++++++-----------
>> Â6 files changed, 132 insertions(+), 75 deletions(-)
>>
>> diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
>> index 76e3f1c..a3a5506 100644
>> --- a/arch/x86/kvm/i8254.c
>> +++ b/arch/x86/kvm/i8254.c
>> @@ -289,9 +289,8 @@ static void pit_do_work(struct work_struct *work)
>> Â Â Â struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
>> Â Â Â struct kvm *kvm = pit->kvm;
>> Â Â Â struct kvm_vcpu *vcpu;
>> - Â Â int i;
>> Â Â Â struct kvm_kpit_state *ps = &pit->pit_state;
>> - Â Â int inject = 0;
>> + Â Â int idx, inject = 0;
>>
>> Â Â Â /* Try to inject pending interrupts when
>> Â Â Â Â* last one has been acked.
>> @@ -315,9 +314,12 @@ static void pit_do_work(struct work_struct *work)
>> Â Â Â Â Â Â Â Â* LVT0 to NMI delivery. Other PIC interrupts are just sent to
>> Â Â Â Â Â Â Â Â* VCPU0, and only if its LVT0 is in EXTINT mode.
>> Â Â Â Â Â Â Â Â*/
>> - Â Â Â Â Â Â if (kvm->arch.vapics_in_nmi_mode > 0)
>> - Â Â Â Â Â Â Â Â Â Â kvm_for_each_vcpu(i, vcpu, kvm)
>> + Â Â Â Â Â Â if (kvm->arch.vapics_in_nmi_mode > 0) {
>> + Â Â Â Â Â Â Â Â Â Â idx = srcu_read_lock(&kvm->srcu_vcpus);
>> + Â Â Â Â Â Â Â Â Â Â kvm_for_each_vcpu(vcpu, kvm)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â kvm_apic_nmi_wd_deliver(vcpu);
>> + Â Â Â Â Â Â Â Â Â Â srcu_read_unlock(&kvm->srcu_vcpus, idx);
>> + Â Â Â Â Â Â }
>> Â Â Â }
>> Â}
>>
>> diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
>> index cac4746..5ef5c05 100644
>> --- a/arch/x86/kvm/i8259.c
>> +++ b/arch/x86/kvm/i8259.c
>> @@ -50,25 +50,29 @@ static void pic_unlock(struct kvm_pic *s)
>> Â{
>> Â Â Â bool wakeup = s->wakeup_needed;
>> Â Â Â struct kvm_vcpu *vcpu, *found = NULL;
>> - Â Â int i;
>> + Â Â struct kvm *kvm = s->kvm;
>> + Â Â int idx;
>>
>> Â Â Â s->wakeup_needed = false;
>>
>> Â Â Â spin_unlock(&s->lock);
>>
>> Â Â Â if (wakeup) {
>> - Â Â Â Â Â Â kvm_for_each_vcpu(i, vcpu, s->kvm) {
>> + Â Â Â Â Â Â idx = srcu_read_lock(&kvm->srcu_vcpus);
>> + Â Â Â Â Â Â kvm_for_each_vcpu(vcpu, kvm)
>> Â Â Â Â Â Â Â Â Â Â Â if (kvm_apic_accept_pic_intr(vcpu)) {
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â found = vcpu;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â break;
>> Â Â Â Â Â Â Â Â Â Â Â }
>> - Â Â Â Â Â Â }
>>
>> - Â Â Â Â Â Â if (!found)
>> + Â Â Â Â Â Â if (!found) {
>> + Â Â Â Â Â Â Â Â Â Â srcu_read_unlock(&kvm->srcu_vcpus, idx);
>> Â Â Â Â Â Â Â Â Â Â Â return;
>> + Â Â Â Â Â Â }
>>
>> Â Â Â Â Â Â Â kvm_make_request(KVM_REQ_EVENT, found);
>> Â Â Â Â Â Â Â kvm_vcpu_kick(found);
>> + Â Â Â Â Â Â srcu_read_unlock(&kvm->srcu_vcpus, idx);
>> Â Â Â }
>> Â}
>>
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 23c93fe..b79739d 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -1774,14 +1774,20 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>> Âstatic int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>> Â{
>> Â Â Â u64 data = 0;
>> + Â Â int idx;
>>
>> Â Â Â switch (msr) {
>> Â Â Â case HV_X64_MSR_VP_INDEX: {
>> - Â Â Â Â Â Â int r;
>> + Â Â Â Â Â Â int r = 0;
>> Â Â Â Â Â Â Â struct kvm_vcpu *v;
>> - Â Â Â Â Â Â kvm_for_each_vcpu(r, v, vcpu->kvm)
>> + Â Â Â Â Â Â struct kvm *kvm = vcpu->kvm;
>> + Â Â Â Â Â Â idx = srcu_read_lock(&kvm->srcu_vcpus);
>> + Â Â Â Â Â Â kvm_for_each_vcpu(v, vcpu->kvm) {
>> Â Â Â Â Â Â Â Â Â Â Â if (v == vcpu)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â data = r;
>> + Â Â Â Â Â Â Â Â Â Â r++;
>> + Â Â Â Â Â Â }
>> + Â Â Â Â Â Â srcu_read_unlock(&kvm->srcu_vcpus, idx);
>> Â Â Â Â Â Â Â break;
>> Â Â Â }
>> Â Â Â case HV_X64_MSR_EOI:
>> @@ -4529,7 +4535,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
>> Â Â Â struct cpufreq_freqs *freq = data;
>> Â Â Â struct kvm *kvm;
>> Â Â Â struct kvm_vcpu *vcpu;
>> - Â Â int i, send_ipi = 0;
>> + Â Â int idx, send_ipi = 0;
>>
>> Â Â Â /*
>> Â Â Â Â* We allow guests to temporarily run on slowing clocks,
>> @@ -4579,13 +4585,16 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
>>
>> Â Â Â raw_spin_lock(&kvm_lock);
>> Â Â Â list_for_each_entry(kvm, &vm_list, vm_list) {
>> - Â Â Â Â Â Â kvm_for_each_vcpu(i, vcpu, kvm) {
>> + Â Â Â Â Â Â idx = srcu_read_lock(&kvm->srcu_vcpus);
>> + Â Â Â Â Â Â kvm_for_each_vcpu(vcpu, kvm) {
>> Â Â Â Â Â Â Â Â Â Â Â if (vcpu->cpu != freq->cpu)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â continue;
>> Â Â Â Â Â Â Â Â Â Â Â kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
>> Â Â Â Â Â Â Â Â Â Â Â if (vcpu->cpu != smp_processor_id())
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â send_ipi = 1;
>> Â Â Â Â Â Â Â }
>> + Â Â Â Â Â Â srcu_read_unlock(&kvm->srcu_vcpus, idx);
>> +
>> Â Â Â }
>> Â Â Â raw_spin_unlock(&kvm_lock);
>>
>> @@ -5866,13 +5875,17 @@ int kvm_arch_hardware_enable(void *garbage)
>> Â{
>> Â Â Â struct kvm *kvm;
>> Â Â Â struct kvm_vcpu *vcpu;
>> - Â Â int i;
>> + Â Â int idx;
>>
>> Â Â Â kvm_shared_msr_cpu_online();
>> - Â Â list_for_each_entry(kvm, &vm_list, vm_list)
>> - Â Â Â Â Â Â kvm_for_each_vcpu(i, vcpu, kvm)
>> + Â Â list_for_each_entry(kvm, &vm_list, vm_list) {
>> + Â Â Â Â Â Â idx = srcu_read_lock(&kvm->srcu_vcpus);
>> + Â Â Â Â Â Â kvm_for_each_vcpu(vcpu, kvm) {
>> Â Â Â Â Â Â Â Â Â Â Â if (vcpu->cpu == smp_processor_id())
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
>> + Â Â Â Â Â Â }
>> + Â Â Â Â Â Â srcu_read_unlock(&kvm->srcu_vcpus, idx);
>> + Â Â }
>> Â Â Â return kvm_x86_ops->hardware_enable(garbage);
>> Â}
>>
>> @@ -5989,27 +6002,14 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
>> Â Â Â vcpu_put(vcpu);
>> Â}
>>
>> -static void kvm_free_vcpus(struct kvm *kvm)
>> +void kvm_arch_vcpu_zap(struct kvm_vcpu *vcpu)
>> Â{
>> - Â Â unsigned int i;
>> - Â Â struct kvm_vcpu *vcpu;
>> -
>> - Â Â /*
>> - Â Â Â* Unpin any mmu pages first.
>> - Â Â Â*/
>> - Â Â kvm_for_each_vcpu(i, vcpu, kvm) {
>> - Â Â Â Â Â Â kvm_clear_async_pf_completion_queue(vcpu);
>> - Â Â Â Â Â Â kvm_unload_vcpu_mmu(vcpu);
>> - Â Â }
>> - Â Â kvm_for_each_vcpu(i, vcpu, kvm)
>> - Â Â Â Â Â Â kvm_arch_vcpu_free(vcpu);
>> -
>> - Â Â mutex_lock(&kvm->lock);
>> - Â Â for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
>> - Â Â Â Â Â Â kvm->vcpus[i] = NULL;
>> + Â Â struct kvm *kvm = vcpu->kvm;
>>
>> - Â Â atomic_set(&kvm->online_vcpus, 0);
>> - Â Â mutex_unlock(&kvm->lock);
>> + Â Â kvm_clear_async_pf_completion_queue(vcpu);
>> + Â Â kvm_unload_vcpu_mmu(vcpu);
>> + Â Â kvm_arch_vcpu_free(vcpu);
>> + Â Â kvm_put_kvm(kvm);
>> Â}
>>
>> Âvoid kvm_arch_sync_events(struct kvm *kvm)
>> @@ -6023,7 +6023,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>> Â Â Â kvm_iommu_unmap_guest(kvm);
>> Â Â Â kfree(kvm->arch.vpic);
>> Â Â Â kfree(kvm->arch.vioapic);
>> - Â Â kvm_free_vcpus(kvm);
>> Â Â Â if (kvm->arch.apic_access_page)
>> Â Â Â Â Â Â Â put_page(kvm->arch.apic_access_page);
>> Â Â Â if (kvm->arch.ept_identity_pagetable)
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 8c5c303..ab22828 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -115,6 +115,7 @@ enum {
>>
>> Âstruct kvm_vcpu {
>> Â Â Â struct kvm *kvm;
>> + Â Â struct list_head list;
>> Â#ifdef CONFIG_PREEMPT_NOTIFIERS
>> Â Â Â struct preempt_notifier preempt_notifier;
>> Â#endif
>> @@ -249,13 +250,15 @@ struct kvm {
>> Â Â Â struct mm_struct *mm; /* userspace tied to this vm */
>> Â Â Â struct kvm_memslots *memslots;
>> Â Â Â struct srcu_struct srcu;
>> + Â Â struct srcu_struct srcu_vcpus;
>> +
>> Â#ifdef CONFIG_KVM_APIC_ARCHITECTURE
>> Â Â Â u32 bsp_vcpu_id;
>> Â Â Â struct kvm_vcpu *bsp_vcpu;
> Rebase to latest kvm.git.
>
>> Â#endif
>> - Â Â struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
>> + Â Â struct list_head vcpus;
>> Â Â Â atomic_t online_vcpus;
>> - Â Â int last_boosted_vcpu;
>> + Â Â struct kvm_vcpu *last_boosted_vcpu;
>> Â Â Â struct list_head vm_list;
>> Â Â Â struct mutex lock;
>> Â Â Â struct kvm_io_bus *buses[KVM_NR_BUSES];
>> @@ -302,17 +305,10 @@ struct kvm {
>> Â#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
>> Â#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
>>
>> -static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
>> -{
>> - Â Â smp_rmb();
>> - Â Â return kvm->vcpus[i];
>> -}
>> +void kvm_arch_vcpu_zap(struct kvm_vcpu *vcpu);
>>
>> -#define kvm_for_each_vcpu(idx, vcpup, kvm) \
>> - Â Â for (idx = 0; \
>> - Â Â Â Â Âidx < atomic_read(&kvm->online_vcpus) && \
>> - Â Â Â Â Â(vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
>> - Â Â Â Â Âidx++)
>> +#define kvm_for_each_vcpu(vcpu, kvm) \
>> + Â Â list_for_each_entry_rcu(vcpu, &kvm->vcpus, list)
>>
>> Â#define kvm_for_each_memslot(memslot, slots) \
>> Â Â Â for (memslot = &slots->memslots[0]; Â Â \
>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>> index 9f614b4..78dc97c 100644
>> --- a/virt/kvm/irq_comm.c
>> +++ b/virt/kvm/irq_comm.c
>> @@ -81,14 +81,15 @@ inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
>> Âint kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>> Â Â Â Â Â Â Â struct kvm_lapic_irq *irq)
>> Â{
>> - Â Â int i, r = -1;
>> + Â Â int idx, r = -1;
>> Â Â Â struct kvm_vcpu *vcpu, *lowest = NULL;
>>
>> Â Â Â if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
>> Â Â Â Â Â Â Â Â Â Â Â kvm_is_dm_lowest_prio(irq))
>> Â Â Â Â Â Â Â printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
>>
>> - Â Â kvm_for_each_vcpu(i, vcpu, kvm) {
>> + Â Â idx = srcu_read_lock(&kvm->srcu_vcpus);
>> + Â Â kvm_for_each_vcpu(vcpu, kvm) {
>> Â Â Â Â Â Â Â if (!kvm_apic_present(vcpu))
>> Â Â Â Â Â Â Â Â Â Â Â continue;
>>
>> @@ -111,6 +112,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>> Â Â Â if (lowest)
>> Â Â Â Â Â Â Â r = kvm_apic_set_irq(lowest, irq);
>>
>> + Â Â srcu_read_unlock(&kvm->srcu_vcpus, idx);
>> Â Â Â return r;
>> Â}
>>
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index e289486..ec0c920 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -171,7 +171,7 @@ static void ack_flush(void *_completed)
>>
>> Âstatic bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
>> Â{
>> - Â Â int i, cpu, me;
>> + Â Â int cpu, me, idx;
>> Â Â Â cpumask_var_t cpus;
>> Â Â Â bool called = true;
>> Â Â Â struct kvm_vcpu *vcpu;
>> @@ -179,7 +179,8 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
>> Â Â Â zalloc_cpumask_var(&cpus, GFP_ATOMIC);
>>
>> Â Â Â me = get_cpu();
>> - Â Â kvm_for_each_vcpu(i, vcpu, kvm) {
>> + Â Â idx = srcu_read_lock(&kvm->srcu_vcpus);
>> + Â Â kvm_for_each_vcpu(vcpu, kvm) {
>> Â Â Â Â Â Â Â kvm_make_request(req, vcpu);
>> Â Â Â Â Â Â Â cpu = vcpu->cpu;
>>
>> @@ -190,12 +191,15 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
>> Â Â Â Â Â Â Â Â Â Â kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
>> Â Â Â Â Â Â Â Â Â Â Â cpumask_set_cpu(cpu, cpus);
>> Â Â Â }
>> + Â Â srcu_read_unlock(&kvm->srcu_vcpus, idx);
>> +
>> Â Â Â if (unlikely(cpus == NULL))
>> Â Â Â Â Â Â Â smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
>> Â Â Â else if (!cpumask_empty(cpus))
>> Â Â Â Â Â Â Â smp_call_function_many(cpus, ack_flush, NULL, 1);
>> Â Â Â else
>> Â Â Â Â Â Â Â called = false;
>> +
>> Â Â Â put_cpu();
>> Â Â Â free_cpumask_var(cpus);
>> Â Â Â return called;
>> @@ -477,6 +481,8 @@ static struct kvm *kvm_create_vm(void)
>> Â Â Â kvm_init_memslots_id(kvm);
>> Â Â Â if (init_srcu_struct(&kvm->srcu))
>> Â Â Â Â Â Â Â goto out_err_nosrcu;
>> + Â Â if (init_srcu_struct(&kvm->srcu_vcpus))
>> + Â Â Â Â Â Â goto out_err_nosrcu_vcpus;
>> Â Â Â for (i = 0; i < KVM_NR_BUSES; i++) {
>> Â Â Â Â Â Â Â kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â GFP_KERNEL);
>> @@ -500,10 +506,13 @@ static struct kvm *kvm_create_vm(void)
>> Â Â Â raw_spin_lock(&kvm_lock);
>> Â Â Â list_add(&kvm->vm_list, &vm_list);
>> Â Â Â raw_spin_unlock(&kvm_lock);
>> + Â Â INIT_LIST_HEAD(&kvm->vcpus);
>>
>> Â Â Â return kvm;
>>
>> Âout_err:
>> + Â Â cleanup_srcu_struct(&kvm->srcu_vcpus);
>> +out_err_nosrcu_vcpus:
>> Â Â Â cleanup_srcu_struct(&kvm->srcu);
>> Âout_err_nosrcu:
>> Â Â Â hardware_disable_all();
>> @@ -587,6 +596,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
>> Â Â Â kvm_arch_destroy_vm(kvm);
>> Â Â Â kvm_free_physmem(kvm);
>> Â Â Â cleanup_srcu_struct(&kvm->srcu);
>> + Â Â cleanup_srcu_struct(&kvm->srcu_vcpus);
>> Â Â Â kvm_arch_free_vm(kvm);
>> Â Â Â hardware_disable_all();
>> Â Â Â mmdrop(mm);
>> @@ -1593,11 +1603,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>> Â{
>> Â Â Â struct kvm *kvm = me->kvm;
>> Â Â Â struct kvm_vcpu *vcpu;
>> - Â Â int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
>> - Â Â int yielded = 0;
>> - Â Â int pass;
>> - Â Â int i;
>> -
>> + Â Â struct task_struct *task = NULL;
>> + Â Â struct pid *pid;
>> + Â Â int pass, firststart, lastone, yielded, idx;
>> Â Â Â /*
>> Â Â Â Â* We boost the priority of a VCPU that is runnable but not
>> Â Â Â Â* currently running, because it got preempted by something
>> @@ -1605,15 +1613,22 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>> Â Â Â Â* VCPU is holding the lock that we need and will release it.
>> Â Â Â Â* We approximate round-robin by starting at the last boosted VCPU.
>> Â Â Â Â*/
>> - Â Â for (pass = 0; pass < 2 && !yielded; pass++) {
>> - Â Â Â Â Â Â kvm_for_each_vcpu(i, vcpu, kvm) {
>> - Â Â Â Â Â Â Â Â Â Â struct task_struct *task = NULL;
>> - Â Â Â Â Â Â Â Â Â Â struct pid *pid;
>> - Â Â Â Â Â Â Â Â Â Â if (!pass && i < last_boosted_vcpu) {
>> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â i = last_boosted_vcpu;
>> + Â Â for (pass = 0, firststart = 0; pass < 2 && !yielded; pass++) {
>> +
>> + Â Â Â Â Â Â idx = srcu_read_lock(&kvm->srcu_vcpus);
>> + Â Â Â Â Â Â kvm_for_each_vcpu(vcpu, kvm) {
>> + Â Â Â Â Â Â Â Â Â Â if (!pass && !firststart &&
>> + Â Â Â Â Â Â Â Â Â Â Â Â vcpu != kvm->last_boosted_vcpu &&
>> + Â Â Â Â Â Â Â Â Â Â Â Â kvm->last_boosted_vcpu != NULL) {
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â vcpu = kvm->last_boosted_vcpu;
> You access last_boosted_vcpu as if it is protected by srcu, but it
> isn't. kvm_vcpu_release() changes it after synchronize_srcu_expedited()
> call.
>
Oh, get it. It opens a gap to make the access to the reclaimed vcpu possible.

> I do not like this last_boosted_vcpu pointer much. May be we can rid of
> it by remembering last apic_id and searching for it each time we enter
> the function. I do not think this function is to performance sensitive.
> We enter here when vcpu is spinning anyway.
>
Fine, I find it is very hard to protect both the rcu_list and this
pointer at the same time. And vcpu_id give me a way out.

Thanks and regards,
ping fan
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â firststart = 1;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â continue;
>> - Â Â Â Â Â Â Â Â Â Â } else if (pass && i > last_boosted_vcpu)
>> + Â Â Â Â Â Â Â Â Â Â } else if (pass && !lastone) {
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â if (vcpu == kvm->last_boosted_vcpu)
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â lastone = 1;
>> + Â Â Â Â Â Â Â Â Â Â } else if (pass && lastone)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â break;
>> +
>> Â Â Â Â Â Â Â Â Â Â Â if (vcpu == me)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â continue;
>> Â Â Â Â Â Â Â Â Â Â Â if (waitqueue_active(&vcpu->wq))
>> @@ -1629,15 +1644,20 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â put_task_struct(task);
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â continue;
>> Â Â Â Â Â Â Â Â Â Â Â }
>> +
>> Â Â Â Â Â Â Â Â Â Â Â if (yield_to(task, 1)) {
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â put_task_struct(task);
>> - Â Â Â Â Â Â Â Â Â Â Â Â Â Â kvm->last_boosted_vcpu = i;
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â mutex_lock(&kvm->lock);
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â kvm->last_boosted_vcpu = vcpu;
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â mutex_unlock(&kvm->lock);
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â yielded = 1;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â break;
>> Â Â Â Â Â Â Â Â Â Â Â }
>> Â Â Â Â Â Â Â Â Â Â Â put_task_struct(task);
>> Â Â Â Â Â Â Â }
>> + Â Â Â Â Â Â srcu_read_unlock(&kvm->srcu_vcpus, idx);
>> Â Â Â }
>> +
>> Â}
>> ÂEXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
>>
>> @@ -1673,11 +1693,30 @@ static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
>> Â Â Â return 0;
>> Â}
>>
>> +static void kvm_vcpu_zap(struct kvm_vcpu *vcpu)
>> +{
>> + Â Â kvm_arch_vcpu_zap(vcpu);
>> +}
>> +
>> Âstatic int kvm_vcpu_release(struct inode *inode, struct file *filp)
>> Â{
>> Â Â Â struct kvm_vcpu *vcpu = filp->private_data;
>> + Â Â struct kvm *kvm = vcpu->kvm;
>> + Â Â filp->private_data = NULL;
>> +
>> + Â Â mutex_lock(&kvm->lock);
>> + Â Â list_del_rcu(&vcpu->list);
>> + Â Â atomic_dec(&kvm->online_vcpus);
>> + Â Â mutex_unlock(&kvm->lock);
>> + Â Â synchronize_srcu_expedited(&kvm->srcu_vcpus);
>> +
>> + Â Â mutex_lock(&kvm->lock);
>> + Â Â if (kvm->last_boosted_vcpu == vcpu)
>> + Â Â Â Â Â Â kvm->last_boosted_vcpu = NULL;
>> + Â Â mutex_unlock(&kvm->lock);
>>
>> - Â Â kvm_put_kvm(vcpu->kvm);
>> + Â Â /*vcpu is out of list,drop it safely*/
>> + Â Â kvm_vcpu_zap(vcpu);
>> Â Â Â return 0;
>> Â}
>>
>> @@ -1699,15 +1738,25 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
>> Â Â Â return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
>> Â}
>>
>> +static struct kvm_vcpu *kvm_vcpu_create(struct kvm *kvm, u32 id)
>> +{
>> + Â Â struct kvm_vcpu *vcpu;
>> + Â Â vcpu = kvm_arch_vcpu_create(kvm, id);
>> + Â Â if (IS_ERR(vcpu))
>> + Â Â Â Â Â Â return vcpu;
>> + Â Â INIT_LIST_HEAD(&vcpu->list);
>> + Â Â return vcpu;
>> +}
>> +
>> Â/*
>> Â * Creates some virtual cpus. ÂGood luck creating more than one.
>> Â */
>> Âstatic int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>> Â{
>> - Â Â int r;
>> + Â Â int r, idx;
>> Â Â Â struct kvm_vcpu *vcpu, *v;
>>
>> - Â Â vcpu = kvm_arch_vcpu_create(kvm, id);
>> + Â Â vcpu = kvm_vcpu_create(kvm, id);
>> Â Â Â if (IS_ERR(vcpu))
>> Â Â Â Â Â Â Â return PTR_ERR(vcpu);
>>
>> @@ -1723,13 +1772,15 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>> Â Â Â Â Â Â Â goto unlock_vcpu_destroy;
>> Â Â Â }
>>
>> - Â Â kvm_for_each_vcpu(r, v, kvm)
>> + Â Â idx = srcu_read_lock(&kvm->srcu_vcpus);
>> + Â Â kvm_for_each_vcpu(v, kvm) {
>> Â Â Â Â Â Â Â if (v->vcpu_id == id) {
>> Â Â Â Â Â Â Â Â Â Â Â r = -EEXIST;
>> + Â Â Â Â Â Â Â Â Â Â srcu_read_unlock(&kvm->srcu_vcpus, idx);
>> Â Â Â Â Â Â Â Â Â Â Â goto unlock_vcpu_destroy;
>> Â Â Â Â Â Â Â }
>> -
>> - Â Â BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
>> + Â Â }
>> + Â Â srcu_read_unlock(&kvm->srcu_vcpus, idx);
>>
>> Â Â Â /* Now it's all set up, let userspace reach it */
>> Â Â Â kvm_get_kvm(kvm);
>> @@ -1739,8 +1790,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
>> Â Â Â Â Â Â Â goto unlock_vcpu_destroy;
>> Â Â Â }
>>
>> - Â Â kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
>> - Â Â smp_wmb();
>> + Â Â /*Protected by kvm->lock*/
>> + Â Â list_add_rcu(&vcpu->list, &kvm->vcpus);
>> Â Â Â atomic_inc(&kvm->online_vcpus);
>>
>> Â#ifdef CONFIG_KVM_APIC_ARCHITECTURE
>> @@ -2645,13 +2696,16 @@ static int vcpu_stat_get(void *_offset, u64 *val)
>> Â Â Â unsigned offset = (long)_offset;
>> Â Â Â struct kvm *kvm;
>> Â Â Â struct kvm_vcpu *vcpu;
>> - Â Â int i;
>> + Â Â int idx;
>>
>> Â Â Â *val = 0;
>> Â Â Â raw_spin_lock(&kvm_lock);
>> - Â Â list_for_each_entry(kvm, &vm_list, vm_list)
>> - Â Â Â Â Â Â kvm_for_each_vcpu(i, vcpu, kvm)
>> + Â Â list_for_each_entry(kvm, &vm_list, vm_list) {
>> + Â Â Â Â Â Â idx = srcu_read_lock(&kvm->srcu_vcpus);
>> + Â Â Â Â Â Â kvm_for_each_vcpu(vcpu, kvm)
>> Â Â Â Â Â Â Â Â Â Â Â *val += *(u32 *)((void *)vcpu + offset);
>> + Â Â Â Â Â Â srcu_read_unlock(&kvm->srcu_vcpus, idx);
>> + Â Â }
>>
>> Â Â Â raw_spin_unlock(&kvm_lock);
>> Â Â Â return 0;
>> --
>> 1.7.4.4
>
> --
> Â Â Â Â Â Â Â Â Â Â Â ÂGleb.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Adrian Hunter: "Re: [PATCH] intel_mid_dma: remove legacy pm interface"
Previous message: Richard Cochran: "Re: [PATCH 0/4] clock_gettime_ns and x86-64 optimizations"
In reply to: Gleb Natapov: "Re: [PATCH v5] kvm: make vcpu life cycle separated from kvm instance"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]