Re: [PATCHv2] kvm: remove in_range and switch to rwsem for iobus

From: Marcelo Tosatti
Date: Mon Jun 29 2009 - 10:07:29 EST


On Sun, Jun 28, 2009 at 10:34:25PM +0300, Michael S. Tsirkin wrote:
> This changes bus accesses to use high-level kvm_io_bus_read/kvm_io_bus_write
> functions, which utilize read/write semaphore intead of mutex. in_range now
> becomes unused so it is removed from device ops in favor of read/write
> callbacks performing range checks internally.
>
> This allows aliasing (mostly for in-kernel virtio), as well as better error
> handling by making it possible to pass errors up to userspace. And it's enough
> to look at the diffstat to see that it's a better API anyway.

Can you please expand a little on this? Still don't get why making
->in_range part of ->read / ->write is a good thing. Aliasing?

Agree that proliferation of locks is a bad thing.

> While we are at it, document locking rules for kvm_io_device_ops.
>
> Note: since the use of the new bus_lock is localized to a small number of
> places, it will be easy to switch to srcu in the future if we so desire.
>
> Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx>
> ---
>
> OK, here's an updated version of the patch. I punted on rcu and went
> with rw semaphore for now, but it will be easy to change now that the
> use of the semaphore is fairly isolated. Works for me, but please
> review carefully.
>
> arch/ia64/kvm/kvm-ia64.c | 28 +++--------
> arch/x86/kvm/i8254.c | 53 +++++++++++----------
> arch/x86/kvm/i8259.c | 22 +++++----
> arch/x86/kvm/lapic.c | 44 ++++++++----------
> arch/x86/kvm/x86.c | 112 ++++++++++++++-------------------------------
> include/linux/kvm_host.h | 10 +++-
> virt/kvm/coalesced_mmio.c | 28 ++++++------
> virt/kvm/ioapic.c | 24 +++++----
> virt/kvm/iodev.h | 42 +++++++----------
> virt/kvm/kvm_main.c | 41 ++++++++++++-----
> 10 files changed, 184 insertions(+), 220 deletions(-)
>
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index c1c5cb6..5bf5100 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -210,16 +210,6 @@ int kvm_dev_ioctl_check_extension(long ext)
>
> }
>
> -static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
> - gpa_t addr, int len, int is_write)
> -{
> - struct kvm_io_device *dev;
> -
> - dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write);
> -
> - return dev;
> -}
> -
> static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> {
> kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
> @@ -231,6 +221,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> {
> struct kvm_mmio_req *p;
> struct kvm_io_device *mmio_dev;
> + int r;
>
> p = kvm_get_vcpu_ioreq(vcpu);
>
> @@ -247,16 +238,13 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> kvm_run->exit_reason = KVM_EXIT_MMIO;
> return 0;
> mmio:
> - mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir);
> - if (mmio_dev) {
> - if (!p->dir)
> - kvm_iodevice_write(mmio_dev, p->addr, p->size,
> - &p->data);
> - else
> - kvm_iodevice_read(mmio_dev, p->addr, p->size,
> - &p->data);
> -
> - } else
> + if (p->dir)
> + r = kvm_io_bus_read(vcpu->kvm, &vcpu->kvm->mmio_bus, p->addr,
> + p->size, &p->data);
> + else
> + r = kvm_io_bus_write(vcpu->kvm, &vcpu->kvm->mmio_bus, p->addr,
> + p->size, &p->data);
> + if (r)
> printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
> p->state = STATE_IORESP_READY;
>
> diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> index 331705f..3155ffa 100644
> --- a/arch/x86/kvm/i8254.c
> +++ b/arch/x86/kvm/i8254.c
> @@ -357,8 +357,14 @@ static inline struct kvm_pit *speaker_to_pit(struct kvm_io_device *dev)
> return container_of(dev, struct kvm_pit, speaker_dev);
> }
>
> -static void pit_ioport_write(struct kvm_io_device *this,
> - gpa_t addr, int len, const void *data)
> +static inline int pit_in_range(gpa_t addr)
> +{
> + return ((addr >= KVM_PIT_BASE_ADDRESS) &&
> + (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
> +}
> +
> +static int pit_ioport_write(struct kvm_io_device *this,
> + gpa_t addr, int len, const void *data)
> {
> struct kvm_pit *pit = dev_to_pit(this);
> struct kvm_kpit_state *pit_state = &pit->pit_state;
> @@ -366,6 +372,8 @@ static void pit_ioport_write(struct kvm_io_device *this,
> int channel, access;
> struct kvm_kpit_channel_state *s;
> u32 val = *(u32 *) data;
> + if (!pit_in_range(addr))
> + return -EOPNOTSUPP;
>
> val &= 0xff;
> addr &= KVM_PIT_CHANNEL_MASK;
> @@ -428,16 +436,19 @@ static void pit_ioport_write(struct kvm_io_device *this,
> }
>
> mutex_unlock(&pit_state->lock);
> + return 0;
> }
>
> -static void pit_ioport_read(struct kvm_io_device *this,
> - gpa_t addr, int len, void *data)
> +static int pit_ioport_read(struct kvm_io_device *this,
> + gpa_t addr, int len, void *data)
> {
> struct kvm_pit *pit = dev_to_pit(this);
> struct kvm_kpit_state *pit_state = &pit->pit_state;
> struct kvm *kvm = pit->kvm;
> int ret, count;
> struct kvm_kpit_channel_state *s;
> + if (!pit_in_range(addr))
> + return -EOPNOTSUPP;
>
> addr &= KVM_PIT_CHANNEL_MASK;
> s = &pit_state->channels[addr];
> @@ -492,37 +503,36 @@ static void pit_ioport_read(struct kvm_io_device *this,
> memcpy(data, (char *)&ret, len);
>
> mutex_unlock(&pit_state->lock);
> + return 0;
> }
>
> -static int pit_in_range(struct kvm_io_device *this, gpa_t addr,
> - int len, int is_write)
> -{
> - return ((addr >= KVM_PIT_BASE_ADDRESS) &&
> - (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
> -}
> -
> -static void speaker_ioport_write(struct kvm_io_device *this,
> - gpa_t addr, int len, const void *data)
> +static int speaker_ioport_write(struct kvm_io_device *this,
> + gpa_t addr, int len, const void *data)
> {
> struct kvm_pit *pit = speaker_to_pit(this);
> struct kvm_kpit_state *pit_state = &pit->pit_state;
> struct kvm *kvm = pit->kvm;
> u32 val = *(u32 *) data;
> + if (addr != KVM_SPEAKER_BASE_ADDRESS)
> + return -EOPNOTSUPP;
>
> mutex_lock(&pit_state->lock);
> pit_state->speaker_data_on = (val >> 1) & 1;
> pit_set_gate(kvm, 2, val & 1);
> mutex_unlock(&pit_state->lock);
> + return 0;
> }
>
> -static void speaker_ioport_read(struct kvm_io_device *this,
> - gpa_t addr, int len, void *data)
> +static int speaker_ioport_read(struct kvm_io_device *this,
> + gpa_t addr, int len, void *data)
> {
> struct kvm_pit *pit = speaker_to_pit(this);
> struct kvm_kpit_state *pit_state = &pit->pit_state;
> struct kvm *kvm = pit->kvm;
> unsigned int refresh_clock;
> int ret;
> + if (addr != KVM_SPEAKER_BASE_ADDRESS)
> + return -EOPNOTSUPP;
>
> /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
> refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
> @@ -534,12 +544,7 @@ static void speaker_ioport_read(struct kvm_io_device *this,
> len = sizeof(ret);
> memcpy(data, (char *)&ret, len);
> mutex_unlock(&pit_state->lock);
> -}
> -
> -static int speaker_in_range(struct kvm_io_device *this, gpa_t addr,
> - int len, int is_write)
> -{
> - return (addr == KVM_SPEAKER_BASE_ADDRESS);
> + return 0;
> }
>
> void kvm_pit_reset(struct kvm_pit *pit)
> @@ -573,13 +578,11 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
> static const struct kvm_io_device_ops pit_dev_ops = {
> .read = pit_ioport_read,
> .write = pit_ioport_write,
> - .in_range = pit_in_range,
> };
>
> static const struct kvm_io_device_ops speaker_dev_ops = {
> .read = speaker_ioport_read,
> .write = speaker_ioport_write,
> - .in_range = speaker_in_range,
> };
>
> struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
> @@ -620,11 +623,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
> kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
>
> kvm_iodevice_init(&pit->dev, &pit_dev_ops);
> - kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
> + kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &pit->dev);
>
> if (flags & KVM_PIT_SPEAKER_DUMMY) {
> kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
> - kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
> + kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &pit->speaker_dev);
> }
>
> return pit;
> diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
> index 148c52a..1d1bb75 100644
> --- a/arch/x86/kvm/i8259.c
> +++ b/arch/x86/kvm/i8259.c
> @@ -430,8 +430,7 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
> return s->elcr;
> }
>
> -static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
> - int len, int is_write)
> +static int picdev_in_range(gpa_t addr)
> {
> switch (addr) {
> case 0x20:
> @@ -451,16 +450,18 @@ static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
> return container_of(dev, struct kvm_pic, dev);
> }
>
> -static void picdev_write(struct kvm_io_device *this,
> +static int picdev_write(struct kvm_io_device *this,
> gpa_t addr, int len, const void *val)
> {
> struct kvm_pic *s = to_pic(this);
> unsigned char data = *(unsigned char *)val;
> + if (!picdev_in_range(addr))
> + return -EOPNOTSUPP;
>
> if (len != 1) {
> if (printk_ratelimit())
> printk(KERN_ERR "PIC: non byte write\n");
> - return;
> + return 0;
> }
> pic_lock(s);
> switch (addr) {
> @@ -476,18 +477,21 @@ static void picdev_write(struct kvm_io_device *this,
> break;
> }
> pic_unlock(s);
> + return 0;
> }
>
> -static void picdev_read(struct kvm_io_device *this,
> - gpa_t addr, int len, void *val)
> +static int picdev_read(struct kvm_io_device *this,
> + gpa_t addr, int len, void *val)
> {
> struct kvm_pic *s = to_pic(this);
> unsigned char data = 0;
> + if (!picdev_in_range(addr))
> + return -EOPNOTSUPP;
>
> if (len != 1) {
> if (printk_ratelimit())
> printk(KERN_ERR "PIC: non byte read\n");
> - return;
> + return 0;
> }
> pic_lock(s);
> switch (addr) {
> @@ -504,6 +508,7 @@ static void picdev_read(struct kvm_io_device *this,
> }
> *(unsigned char *)val = data;
> pic_unlock(s);
> + return 0;
> }
>
> /*
> @@ -526,7 +531,6 @@ static void pic_irq_request(void *opaque, int level)
> static const struct kvm_io_device_ops picdev_ops = {
> .read = picdev_read,
> .write = picdev_write,
> - .in_range = picdev_in_range,
> };
>
> struct kvm_pic *kvm_create_pic(struct kvm *kvm)
> @@ -548,6 +552,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
> * Initialize PIO device
> */
> kvm_iodevice_init(&s->dev, &picdev_ops);
> - kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev);
> + kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
> return s;
> }
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 2e02865..265a765 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -546,18 +546,27 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
> return container_of(dev, struct kvm_lapic, dev);
> }
>
> -static void apic_mmio_read(struct kvm_io_device *this,
> - gpa_t address, int len, void *data)
> +static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
> +{
> + return apic_hw_enabled(apic) &&
> + addr >= apic->base_address &&
> + addr < apic->base_address + LAPIC_MMIO_LENGTH;
> +}
> +
> +static int apic_mmio_read(struct kvm_io_device *this,
> + gpa_t address, int len, void *data)
> {
> struct kvm_lapic *apic = to_lapic(this);
> unsigned int offset = address - apic->base_address;
> unsigned char alignment = offset & 0xf;
> u32 result;
> + if (!apic_mmio_in_range(apic, address))
> + return -EOPNOTSUPP;
>
> if ((alignment + len) > 4) {
> printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d",
> (unsigned long)address, len);
> - return;
> + return 0;
> }
> result = __apic_read(apic, offset & ~0xf);
>
> @@ -574,6 +583,7 @@ static void apic_mmio_read(struct kvm_io_device *this,
> "should be 1,2, or 4 instead\n", len);
> break;
> }
> + return 0;
> }
>
> static void update_divide_count(struct kvm_lapic *apic)
> @@ -629,13 +639,15 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
> apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
> }
>
> -static void apic_mmio_write(struct kvm_io_device *this,
> - gpa_t address, int len, const void *data)
> +static int apic_mmio_write(struct kvm_io_device *this,
> + gpa_t address, int len, const void *data)
> {
> struct kvm_lapic *apic = to_lapic(this);
> unsigned int offset = address - apic->base_address;
> unsigned char alignment = offset & 0xf;
> u32 val;
> + if (!apic_mmio_in_range(apic, address))
> + return -EOPNOTSUPP;
>
> /*
> * APIC register must be aligned on 128-bits boundary.
> @@ -646,7 +658,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
> /* Don't shout loud, $infamous_os would cause only noise. */
> apic_debug("apic write: bad size=%d %lx\n",
> len, (long)address);
> - return;
> + return 0;
> }
>
> val = *(u32 *) data;
> @@ -729,7 +741,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
> hrtimer_cancel(&apic->lapic_timer.timer);
> apic_set_reg(apic, APIC_TMICT, val);
> start_apic_timer(apic);
> - return;
> + return 0;
>
> case APIC_TDCR:
> if (val & 4)
> @@ -743,22 +755,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
> offset);
> break;
> }
> -
> -}
> -
> -static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr,
> - int len, int size)
> -{
> - struct kvm_lapic *apic = to_lapic(this);
> - int ret = 0;
> -
> -
> - if (apic_hw_enabled(apic) &&
> - (addr >= apic->base_address) &&
> - (addr < (apic->base_address + LAPIC_MMIO_LENGTH)))
> - ret = 1;
> -
> - return ret;
> + return 0;
> }
>
> void kvm_free_lapic(struct kvm_vcpu *vcpu)
> @@ -938,7 +935,6 @@ static struct kvm_timer_ops lapic_timer_ops = {
> static const struct kvm_io_device_ops apic_mmio_ops = {
> .read = apic_mmio_read,
> .write = apic_mmio_write,
> - .in_range = apic_mmio_range,
> };
>
> int kvm_create_lapic(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 5a66bb9..badc23f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2260,35 +2260,23 @@ static void kvm_init_msr_list(void)
> num_msrs_to_save = j;
> }
>
> -/*
> - * Only apic need an MMIO device hook, so shortcut now..
> - */
> -static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
> - gpa_t addr, int len,
> - int is_write)
> +static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
> + const void *v)
> {
> - struct kvm_io_device *dev;
> + if (vcpu->arch.apic &&
> + !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
> + return 0;
>
> - if (vcpu->arch.apic) {
> - dev = &vcpu->arch.apic->dev;
> - if (kvm_iodevice_in_range(dev, addr, len, is_write))
> - return dev;
> - }
> - return NULL;
> + return kvm_io_bus_write(vcpu->kvm, &vcpu->kvm->mmio_bus, addr, len, v);
> }
>
> -
> -static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
> - gpa_t addr, int len,
> - int is_write)
> +static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
> {
> - struct kvm_io_device *dev;
> + if (vcpu->arch.apic &&
> + !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
> + return 0;
>
> - dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
> - if (dev == NULL)
> - dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
> - is_write);
> - return dev;
> + return kvm_io_bus_read(vcpu->kvm, &vcpu->kvm->mmio_bus, addr, len, v);
> }
>
> static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
> @@ -2357,7 +2345,6 @@ static int emulator_read_emulated(unsigned long addr,
> unsigned int bytes,
> struct kvm_vcpu *vcpu)
> {
> - struct kvm_io_device *mmio_dev;
> gpa_t gpa;
>
> if (vcpu->mmio_read_completed) {
> @@ -2382,13 +2369,8 @@ mmio:
> /*
> * Is this MMIO handled locally?
> */
> - mutex_lock(&vcpu->kvm->lock);
> - mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
> - mutex_unlock(&vcpu->kvm->lock);
> - if (mmio_dev) {
> - kvm_iodevice_read(mmio_dev, gpa, bytes, val);
> + if (!vcpu_mmio_read(vcpu, gpa, bytes, val))
> return X86EMUL_CONTINUE;
> - }
>
> vcpu->mmio_needed = 1;
> vcpu->mmio_phys_addr = gpa;
> @@ -2415,7 +2397,6 @@ static int emulator_write_emulated_onepage(unsigned long addr,
> unsigned int bytes,
> struct kvm_vcpu *vcpu)
> {
> - struct kvm_io_device *mmio_dev;
> gpa_t gpa;
>
> gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
> @@ -2436,13 +2417,8 @@ mmio:
> /*
> * Is this MMIO handled locally?
> */
> - mutex_lock(&vcpu->kvm->lock);
> - mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
> - mutex_unlock(&vcpu->kvm->lock);
> - if (mmio_dev) {
> - kvm_iodevice_write(mmio_dev, gpa, bytes, val);
> + if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
> return X86EMUL_CONTINUE;
> - }
>
> vcpu->mmio_needed = 1;
> vcpu->mmio_phys_addr = gpa;
> @@ -2758,48 +2734,42 @@ int complete_pio(struct kvm_vcpu *vcpu)
> return 0;
> }
>
> -static void kernel_pio(struct kvm_io_device *pio_dev,
> - struct kvm_vcpu *vcpu,
> - void *pd)
> +static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
> {
> /* TODO: String I/O for in kernel device */
> + int r;
>
> if (vcpu->arch.pio.in)
> - kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
> - vcpu->arch.pio.size,
> - pd);
> + r = kvm_io_bus_read(vcpu->kvm, &vcpu->kvm->pio_bus,
> + vcpu->arch.pio.port,
> + vcpu->arch.pio.size, pd);
> else
> - kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
> - vcpu->arch.pio.size,
> - pd);
> + r = kvm_io_bus_write(vcpu->kvm, &vcpu->kvm->pio_bus,
> + vcpu->arch.pio.port,
> + vcpu->arch.pio.size, pd);
> + return r;
> }
>
> -static void pio_string_write(struct kvm_io_device *pio_dev,
> - struct kvm_vcpu *vcpu)
> +static int pio_string_write(struct kvm_vcpu *vcpu)
> {
> struct kvm_pio_request *io = &vcpu->arch.pio;
> void *pd = vcpu->arch.pio_data;
> - int i;
> + int i, r = 0;
>
> for (i = 0; i < io->cur_count; i++) {
> - kvm_iodevice_write(pio_dev, io->port,
> - io->size,
> - pd);
> + if (kvm_io_bus_write(vcpu->kvm, &vcpu->kvm->pio_bus,
> + io->port, io->size, pd)) {
> + r = -EOPNOTSUPP;
> + break;
> + }
> pd += io->size;
> }
> -}
> -
> -static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
> - gpa_t addr, int len,
> - int is_write)
> -{
> - return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
> + return r;
> }
>
> int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
> int size, unsigned port)
> {
> - struct kvm_io_device *pio_dev;
> unsigned long val;
>
> vcpu->run->exit_reason = KVM_EXIT_IO;
> @@ -2819,11 +2789,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
> val = kvm_register_read(vcpu, VCPU_REGS_RAX);
> memcpy(vcpu->arch.pio_data, &val, 4);
>
> - mutex_lock(&vcpu->kvm->lock);
> - pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
> - mutex_unlock(&vcpu->kvm->lock);
> - if (pio_dev) {
> - kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
> + if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
> complete_pio(vcpu);
> return 1;
> }
> @@ -2837,7 +2803,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
> {
> unsigned now, in_page;
> int ret = 0;
> - struct kvm_io_device *pio_dev;
>
> vcpu->run->exit_reason = KVM_EXIT_IO;
> vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
> @@ -2881,12 +2846,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
>
> vcpu->arch.pio.guest_gva = address;
>
> - mutex_lock(&vcpu->kvm->lock);
> - pio_dev = vcpu_find_pio_dev(vcpu, port,
> - vcpu->arch.pio.cur_count,
> - !vcpu->arch.pio.in);
> - mutex_unlock(&vcpu->kvm->lock);
> -
> if (!vcpu->arch.pio.in) {
> /* string PIO write */
> ret = pio_copy_data(vcpu);
> @@ -2894,16 +2853,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
> kvm_inject_gp(vcpu, 0);
> return 1;
> }
> - if (ret == 0 && pio_dev) {
> - pio_string_write(pio_dev, vcpu);
> + if (ret == 0 && !pio_string_write(vcpu)) {
> complete_pio(vcpu);
> if (vcpu->arch.pio.count == 0)
> ret = 1;
> }
> - } else if (pio_dev)
> - pr_unimpl(vcpu, "no string pio read support yet, "
> - "port %x size %d count %ld\n",
> - port, size, count);
> + }
> + /* no string PIO read support yet */
>
> return ret;
> }
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 2451f48..8337f8e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -42,6 +42,7 @@
>
> #define KVM_USERSPACE_IRQ_SOURCE_ID 0
>
> +struct kvm;
> struct kvm_vcpu;
> extern struct kmem_cache *kvm_vcpu_cache;
>
> @@ -59,9 +60,11 @@ struct kvm_io_bus {
>
> void kvm_io_bus_init(struct kvm_io_bus *bus);
> void kvm_io_bus_destroy(struct kvm_io_bus *bus);
> -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
> - gpa_t addr, int len, int is_write);
> -void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
> +int kvm_io_bus_write(struct kvm *, struct kvm_io_bus *bus, gpa_t addr, int len,
> + const void *val);
> +int kvm_io_bus_read(struct kvm *, struct kvm_io_bus *bus, gpa_t addr, int len,
> + void *val);
> +void kvm_io_bus_register_dev(struct kvm *, struct kvm_io_bus *bus,
> struct kvm_io_device *dev);
>
> struct kvm_vcpu {
> @@ -138,6 +141,7 @@ struct kvm {
> atomic_t online_vcpus;
> struct list_head vm_list;
> struct mutex lock;
> + struct rw_semaphore bus_lock;
> struct kvm_io_bus mmio_bus;
> struct kvm_io_bus pio_bus;
> #ifdef CONFIG_HAVE_KVM_EVENTFD
> diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
> index 397f419..0140218 100644
> --- a/virt/kvm/coalesced_mmio.c
> +++ b/virt/kvm/coalesced_mmio.c
> @@ -19,17 +19,15 @@ static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev)
> return container_of(dev, struct kvm_coalesced_mmio_dev, dev);
> }
>
> -static int coalesced_mmio_in_range(struct kvm_io_device *this,
> - gpa_t addr, int len, int is_write)
> +static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
> + gpa_t addr, int len)
> {
> - struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
> struct kvm_coalesced_mmio_zone *zone;
> struct kvm_coalesced_mmio_ring *ring;
> unsigned avail;
> int i;
>
> - if (!is_write)
> - return 0;
> + /* kvm->bus_lock is taken by the caller */
>
> /* Are we able to batch it ? */
>
> @@ -60,11 +58,13 @@ static int coalesced_mmio_in_range(struct kvm_io_device *this,
> return 0;
> }
>
> -static void coalesced_mmio_write(struct kvm_io_device *this,
> - gpa_t addr, int len, const void *val)
> +static int coalesced_mmio_write(struct kvm_io_device *this,
> + gpa_t addr, int len, const void *val)
> {
> struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
> struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
> + if (!coalesced_mmio_in_range(dev, addr, len))
> + return -EOPNOTSUPP;
>
> spin_lock(&dev->lock);
>
> @@ -76,6 +76,7 @@ static void coalesced_mmio_write(struct kvm_io_device *this,
> smp_wmb();
> ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
> spin_unlock(&dev->lock);
> + return 0;
> }
>
> static void coalesced_mmio_destructor(struct kvm_io_device *this)
> @@ -87,7 +88,6 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this)
>
> static const struct kvm_io_device_ops coalesced_mmio_ops = {
> .write = coalesced_mmio_write,
> - .in_range = coalesced_mmio_in_range,
> .destructor = coalesced_mmio_destructor,
> };
>
> @@ -102,7 +102,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
> kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
> dev->kvm = kvm;
> kvm->coalesced_mmio_dev = dev;
> - kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev);
> + kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
>
> return 0;
> }
> @@ -115,16 +115,16 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
> if (dev == NULL)
> return -EINVAL;
>
> - mutex_lock(&kvm->lock);
> + down_write(&kvm->bus_lock);
> if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
> - mutex_unlock(&kvm->lock);
> + up_write(&kvm->bus_lock);
> return -ENOBUFS;
> }
>
> dev->zone[dev->nb_zones] = *zone;
> dev->nb_zones++;
>
> - mutex_unlock(&kvm->lock);
> + up_write(&kvm->bus_lock);
> return 0;
> }
>
> @@ -138,7 +138,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
> if (dev == NULL)
> return -EINVAL;
>
> - mutex_lock(&kvm->lock);
> + down_write(&kvm->bus_lock);
>
> i = dev->nb_zones;
> while(i) {
> @@ -156,7 +156,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
> i--;
> }
>
> - mutex_unlock(&kvm->lock);
> + up_write(&kvm->bus_lock);
>
> return 0;
> }
> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> index d8b2eca..2d352f7 100644
> --- a/virt/kvm/ioapic.c
> +++ b/virt/kvm/ioapic.c
> @@ -227,20 +227,19 @@ static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
> return container_of(dev, struct kvm_ioapic, dev);
> }
>
> -static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr,
> - int len, int is_write)
> +static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
> {
> - struct kvm_ioapic *ioapic = to_ioapic(this);
> -
> return ((addr >= ioapic->base_address &&
> (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
> }
>
> -static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> - void *val)
> +static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> + void *val)
> {
> struct kvm_ioapic *ioapic = to_ioapic(this);
> u32 result;
> + if (!ioapic_in_range(ioapic, addr))
> + return -ENOTSUPP;
>
> ioapic_debug("addr %lx\n", (unsigned long)addr);
> ASSERT(!(addr & 0xf)); /* check alignment */
> @@ -273,13 +272,16 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> printk(KERN_WARNING "ioapic: wrong length %d\n", len);
> }
> mutex_unlock(&ioapic->kvm->irq_lock);
> + return 0;
> }
>
> -static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
> - const void *val)
> +static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
> + const void *val)
> {
> struct kvm_ioapic *ioapic = to_ioapic(this);
> u32 data;
> + if (!ioapic_in_range(ioapic, addr))
> + return -ENOTSUPP;
>
> ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
> (void*)addr, len, val);
> @@ -290,7 +292,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
> data = *(u32 *) val;
> else {
> printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
> - return;
> + return 0;
> }
>
> addr &= 0xff;
> @@ -312,6 +314,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
> break;
> }
> mutex_unlock(&ioapic->kvm->irq_lock);
> + return 0;
> }
>
> void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
> @@ -329,7 +332,6 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
> static const struct kvm_io_device_ops ioapic_mmio_ops = {
> .read = ioapic_mmio_read,
> .write = ioapic_mmio_write,
> - .in_range = ioapic_in_range,
> };
>
> int kvm_ioapic_init(struct kvm *kvm)
> @@ -343,7 +345,7 @@ int kvm_ioapic_init(struct kvm *kvm)
> kvm_ioapic_reset(ioapic);
> kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
> ioapic->kvm = kvm;
> - kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
> + kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
> return 0;
> }
>
> diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
> index 2c67f5a..c4b07eb 100644
> --- a/virt/kvm/iodev.h
> +++ b/virt/kvm/iodev.h
> @@ -17,20 +17,24 @@
> #define __KVM_IODEV_H__
>
> #include <linux/kvm_types.h>
> +#include <asm/errno.h>
>
> struct kvm_io_device;
>
> +/**
> + * read and write handlers are called under kvm bus_lock.
> + * They return 0 if the transaction has been handled,
> + * or non-zero to have it passed to the next device.
> + **/
> struct kvm_io_device_ops {
> - void (*read)(struct kvm_io_device *this,
> + int (*read)(struct kvm_io_device *this,
> + gpa_t addr,
> + int len,
> + void *val);
> + int (*write)(struct kvm_io_device *this,
> gpa_t addr,
> int len,
> - void *val);
> - void (*write)(struct kvm_io_device *this,
> - gpa_t addr,
> - int len,
> - const void *val);
> - int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len,
> - int is_write);
> + const void *val);
> void (*destructor)(struct kvm_io_device *this);
> };
>
> @@ -45,26 +49,16 @@ static inline void kvm_iodevice_init(struct kvm_io_device *dev,
> dev->ops = ops;
> }
>
> -static inline void kvm_iodevice_read(struct kvm_io_device *dev,
> - gpa_t addr,
> - int len,
> - void *val)
> +static inline int kvm_iodevice_read(struct kvm_io_device *dev,
> + gpa_t addr, int l, void *v)
> {
> - dev->ops->read(dev, addr, len, val);
> + return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
> }
>
> -static inline void kvm_iodevice_write(struct kvm_io_device *dev,
> - gpa_t addr,
> - int len,
> - const void *val)
> +static inline int kvm_iodevice_write(struct kvm_io_device *dev,
> + gpa_t addr, int l, const void *v)
> {
> - dev->ops->write(dev, addr, len, val);
> -}
> -
> -static inline int kvm_iodevice_in_range(struct kvm_io_device *dev,
> - gpa_t addr, int len, int is_write)
> -{
> - return dev->ops->in_range(dev, addr, len, is_write);
> + return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
> }
>
> static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 58d6bc6..3b19839 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -981,6 +981,7 @@ static struct kvm *kvm_create_vm(void)
> kvm_io_bus_init(&kvm->pio_bus);
> kvm_irqfd_init(kvm);
> mutex_init(&kvm->lock);
> + init_rwsem(&kvm->bus_lock);
> mutex_init(&kvm->irq_lock);
> kvm_io_bus_init(&kvm->mmio_bus);
> init_rwsem(&kvm->slots_lock);
> @@ -2484,26 +2485,42 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
> }
> }
>
> -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
> - gpa_t addr, int len, int is_write)
> +int kvm_io_bus_write(struct kvm *kvm, struct kvm_io_bus *bus, gpa_t addr,
> + int len, const void *val)
> {
> - int i;
> -
> - for (i = 0; i < bus->dev_count; i++) {
> - struct kvm_io_device *pos = bus->devs[i];
> -
> - if (kvm_iodevice_in_range(pos, addr, len, is_write))
> - return pos;
> - }
> + int i, r = -EOPNOTSUPP;
> + down_read(&kvm->bus_lock);
> + for (i = 0; i < bus->dev_count; i++)
> + if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) {
> + r = 0;
> + break;
> + }
> + up_read(&kvm->bus_lock);
> + return r;
> +}
>
> - return NULL;
> +int kvm_io_bus_read(struct kvm *kvm, struct kvm_io_bus *bus, gpa_t addr,
> + int len, void *val)
> +{
> + int i, r = -EOPNOTSUPP;
> + down_read(&kvm->bus_lock);
> + for (i = 0; i < bus->dev_count; i++)
> + if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) {
> + r = 0;
> + break;
> + }
> + up_read(&kvm->bus_lock);
> + return r;
> }
>
> -void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
> +void kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
> + struct kvm_io_device *dev)
> {
> + down_write(&kvm->bus_lock);
> BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
>
> bus->devs[bus->dev_count++] = dev;
> + up_write(&kvm->bus_lock);
> }
>
> static struct notifier_block kvm_cpu_notifier = {
> --
> 1.6.2.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/