Re: [PATCH v3 2/4] firmware: use acpi to detect QEMU fw_cfg device for sysfs fw_cfg driver

From: Gabriel L. Somlo
Date: Sun Oct 04 2015 - 16:25:00 EST


On Sun, Oct 04, 2015 at 10:54:57AM +0300, Michael S. Tsirkin wrote:
> On Sat, Oct 03, 2015 at 07:28:07PM -0400, Gabriel L. Somlo wrote:
> > From: Gabriel Somlo <somlo@xxxxxxx>
> >
> > Instead of blindly probing fw_cfg registers at known IOport and MMIO
> > locations, use the ACPI subsystem to determine whether a QEMU fw_cfg
> > device is present, and, if found, to initialize it.
> >
> > This limits portability to architectures which support ACPI (x86 and
> > UEFI-enabled aarch64), but avoids touching hardware registers before
> > being certain that our device is present.
> >
> > NOTE: The standard way to verify the presence of fw_cfg on arm VMs
> > would have been to use the device tree, but that would have left out
> > x86, which is the primary architecture targeted by this patch.
> >
> > Signed-off-by: Gabriel Somlo <somlo@xxxxxxx>
>
> IMHO it's not a good idea to probe registers provided
> by CRS like this.
> It seems quite reasonable that we'd want to add some
> extra registers in the future, and this probing will break.
>
> Further, accessing registers directly means that there's
> no way to have ACPI code access them as that would
> cause race conditions.
>
> Maybe we should provide access methods in ACPI instead?

OK, I think I understand what you meant by "don't poke CRS" in the
other thread...

So, you're proposing I move the follwing bits:

/* atomic access to fw_cfg device (potentially slow i/o, so using
* mutex) */
static DEFINE_MUTEX(fw_cfg_dev_lock);

/* pick appropriate endianness for selector key */
static inline u16 fw_cfg_sel_endianness(u16 key)
{
return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
}

/* type for fw_cfg "directory scan" visitor/callback function */
typedef int (*fw_cfg_file_callback)(const struct fw_cfg_file *f);

/* run a given callback on each fw_cfg directory entry */
static int fw_cfg_scan_dir(fw_cfg_file_callback callback)
{
int ret = 0;
u32 count, i;
struct fw_cfg_file f;

mutex_lock(&fw_cfg_dev_lock);
iowrite16(fw_cfg_sel_endianness(FW_CFG_FILE_DIR), fw_cfg_reg_ctrl);
ioread8_rep(fw_cfg_reg_data, &count, sizeof(count));
for (i = 0; i < be32_to_cpu(count); i++) {
ioread8_rep(fw_cfg_reg_data, &f, sizeof(f));
ret = callback(&f);
if (ret)
break;
}
mutex_unlock(&fw_cfg_dev_lock);
return ret;
}

/* read chunk of given fw_cfg blob (caller responsible for
* sanity-check) */
static inline void fw_cfg_read_blob(u16 key,
void *buf, loff_t pos, size_t count)
{
mutex_lock(&fw_cfg_dev_lock);
iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
while (pos-- > 0)
ioread8(fw_cfg_reg_data);
ioread8_rep(fw_cfg_reg_data, buf, count);
mutex_unlock(&fw_cfg_dev_lock);
}

into the FWCF, "QEMU0002" node as an AML method ? Have ACPI provide
mutual exclusion against competing readers, and somehow figure out how
to call the ACPI/AML code from the guest-side kernel driver whenever
I need to call fw_cfg_read_blob() ?

I guess I could implement fw_cfg_scan_dir() using fw_cfg_read_blob():

u32 count;
size_t bufsize;
void *buf;
fw_cfg_read_blob(FW_CFG_FILE_DIR, &count, 0, sizeof(u32));
bufsize = sizeof(u32) + count * sizeof(struct fw_cfg_file);
buf = kalloc(bufsize);
fw_cfg_read_blob(FW_CFG_FILE_DIR, buf, 0, bufsize);
...
/* now read all the blob meta-data from buf ... */

It would be 100% atomic, but since we can safely assume the fw_cfg
contents never change, it'd be OK.

The atomicity of the ACPI version of fw_cfg_read_blob(), picking the
right endianness for the selector, etc. would have to be done in AML
within the QEMU host-side patch.

If you know of anything I can look at for a good ASL example, please
point it out to me. I'm going to go away now and spend some quality
time with the ACPI spec :)

Thanks,
--Gabriel

>
>
> > ---
> > .../ABI/testing/sysfs-firmware-qemu_fw_cfg | 4 +
> > drivers/firmware/Kconfig | 2 +-
> > drivers/firmware/qemu_fw_cfg.c | 201 +++++++++++----------
> > 3 files changed, 113 insertions(+), 94 deletions(-)
> >
> > diff --git a/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg b/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg
> > index f1ef44e..e9761bf 100644
> > --- a/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg
> > +++ b/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg
> > @@ -76,6 +76,10 @@ Description:
> > the port number of the control register. I.e., the two ports
> > are overlapping, and can not be mapped separately.
> >
> > + NOTE 2. QEMU publishes the register details in the device tree
> > + on arm guests, and in ACPI (under _HID "QEMU0002") on x86 and
> > + select arm (aarch64) VM types.
> > +
> > === Firmware Configuration Items of Interest ===
> >
> > Originally, the index key, size, and formatting of blobs in
> > diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
> > index 0466e80..bc12d31 100644
> > --- a/drivers/firmware/Kconfig
> > +++ b/drivers/firmware/Kconfig
> > @@ -137,7 +137,7 @@ config ISCSI_IBFT
> >
> > config FW_CFG_SYSFS
> > tristate "QEMU fw_cfg device support in sysfs"
> > - depends on SYSFS
> > + depends on SYSFS && ACPI
> > default n
> > help
> > Say Y or M here to enable the exporting of the QEMU firmware
> > diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
> > index 3a67a16..f935afb 100644
> > --- a/drivers/firmware/qemu_fw_cfg.c
> > +++ b/drivers/firmware/qemu_fw_cfg.c
> > @@ -8,6 +8,7 @@
> > */
> >
> > #include <linux/module.h>
> > +#include <linux/acpi.h>
> > #include <linux/slab.h>
> > #include <linux/io.h>
> > #include <linux/ioport.h>
> > @@ -35,53 +36,10 @@ struct fw_cfg_file {
> > char name[FW_CFG_MAX_FILE_PATH];
> > };
> >
> > -/* fw_cfg device i/o access options type */
> > -struct fw_cfg_access {
> > - const char *name;
> > - phys_addr_t base;
> > - u8 size;
> > - u8 ctrl_offset;
> > - u8 data_offset;
> > - bool is_mmio;
> > -};
> > -
> > -/* table of fw_cfg device i/o access options for known architectures */
> > -static struct fw_cfg_access fw_cfg_modes[] = {
> > - {
> > - .name = "fw_cfg IOport on i386, sun4u",
> > - .base = 0x510,
> > - .size = 0x02,
> > - .ctrl_offset = 0x00,
> > - .data_offset = 0x01,
> > - .is_mmio = false,
> > - }, {
> > - .name = "fw_cfg MMIO on arm",
> > - .base = 0x9020000,
> > - .size = 0x0a,
> > - .ctrl_offset = 0x08,
> > - .data_offset = 0x00,
> > - .is_mmio = true,
> > - }, {
> > - .name = "fw_cfg MMIO on sun4m",
> > - .base = 0xd00000510,
> > - .size = 0x03,
> > - .ctrl_offset = 0x00,
> > - .data_offset = 0x02,
> > - .is_mmio = true,
> > - }, {
> > - .name = "fw_cfg MMIO on ppc/mac",
> > - .base = 0xf0000510,
> > - .size = 0x03,
> > - .ctrl_offset = 0x00,
> > - .data_offset = 0x02,
> > - .is_mmio = true,
> > - }, { } /* END */
> > -};
> > -
> > -/* fw_cfg device i/o currently selected option set */
> > -static struct fw_cfg_access *fw_cfg_mode;
> > -
> > /* fw_cfg device i/o register addresses */
> > +static bool fw_cfg_is_mmio;
> > +static phys_addr_t fw_cfg_phys_base;
> > +static u32 fw_cfg_phys_size;
> > static void __iomem *fw_cfg_dev_base;
> > static void __iomem *fw_cfg_reg_ctrl;
> > static void __iomem *fw_cfg_reg_data;
> > @@ -92,7 +50,7 @@ static DEFINE_MUTEX(fw_cfg_dev_lock);
> > /* pick appropriate endianness for selector key */
> > static inline u16 fw_cfg_sel_endianness(u16 key)
> > {
> > - return fw_cfg_mode->is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
> > + return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
> > }
> >
> > /* type for fw_cfg "directory scan" visitor/callback function */
> > @@ -133,60 +91,100 @@ static inline void fw_cfg_read_blob(u16 key,
> > /* clean up fw_cfg device i/o */
> > static void fw_cfg_io_cleanup(void)
> > {
> > - if (fw_cfg_mode->is_mmio) {
> > + if (fw_cfg_is_mmio) {
> > iounmap(fw_cfg_dev_base);
> > - release_mem_region(fw_cfg_mode->base, fw_cfg_mode->size);
> > + release_mem_region(fw_cfg_phys_base, fw_cfg_phys_size);
> > } else {
> > ioport_unmap(fw_cfg_dev_base);
> > - release_region(fw_cfg_mode->base, fw_cfg_mode->size);
> > + release_region(fw_cfg_phys_base, fw_cfg_phys_size);
> > }
> > }
> >
> > -/* probe and map fw_cfg device */
> > -static int __init fw_cfg_io_probe(void)
> > +/* configure fw_cfg device i/o from ACPI _CRS method */
> > +static acpi_status fw_cfg_walk_crs(struct acpi_resource *r, void *context)
> > +{
> > + struct acpi_resource_io *io;
> > + struct acpi_resource_fixed_memory32 *mmio;
> > +
> > + switch (r->type) {
> > + case ACPI_RESOURCE_TYPE_END_TAG:
> > + return AE_OK;
> > + case ACPI_RESOURCE_TYPE_IO:
> > + io = &r->data.io;
> > + /* physical base addr should NOT be already set */
> > + if (fw_cfg_phys_base)
> > + return AE_ERROR;
> > + if (!request_region(io->minimum,
> > + io->address_length, "fw_cfg_io"))
> > + return AE_ERROR;
> > + fw_cfg_dev_base = ioport_map(io->minimum, io->address_length);
> > + if (!fw_cfg_dev_base) {
> > + release_region(io->minimum, io->address_length);
> > + return AE_ERROR;
> > + }
> > + fw_cfg_phys_base = io->minimum;
> > + fw_cfg_phys_size = io->address_length;
> > + fw_cfg_is_mmio = false;
> > + /* set register addresses (pc/i386 offsets) */
> > + fw_cfg_reg_ctrl = fw_cfg_dev_base + 0x00;
> > + fw_cfg_reg_data = fw_cfg_dev_base + 0x01;
> > + return AE_OK;
> > + case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
> > + mmio = &r->data.fixed_memory32;
> > + /* physical base addr should NOT be already set */
> > + if (fw_cfg_phys_base)
> > + return AE_ERROR;
> > + /* MMIO and ACPI, but not on ARM ?!?! */
> > + if (mmio->address_length < 0x0a)
> > + return AE_ERROR;
> > + if (!request_mem_region(mmio->address,
> > + mmio->address_length, "fw_cfg_mem"))
> > + return AE_ERROR;
> > + fw_cfg_dev_base = ioremap(mmio->address, mmio->address_length);
> > + if (!fw_cfg_dev_base) {
> > + release_mem_region(mmio->address, mmio->address_length);
> > + return AE_ERROR;
> > + }
> > + fw_cfg_phys_base = mmio->address;
> > + fw_cfg_phys_size = mmio->address_length;
> > + fw_cfg_is_mmio = true;
> > + /* set register addresses (arm offsets) */
> > + fw_cfg_reg_ctrl = fw_cfg_dev_base + 0x08;
> > + fw_cfg_reg_data = fw_cfg_dev_base + 0x00;
> > + return AE_OK;
> > + default:
> > + return AE_ERROR;
> > + }
> > +}
> > +
> > +/* initialize fw_cfg device i/o from ACPI data */
> > +static int fw_cfg_acpi_init(struct acpi_device *dev)
> > {
> > char sig[FW_CFG_SIG_SIZE];
> > + acpi_status status;
> > + int err;
> >
> > - for (fw_cfg_mode = &fw_cfg_modes[0];
> > - fw_cfg_mode->base; fw_cfg_mode++) {
> > -
> > - phys_addr_t base = fw_cfg_mode->base;
> > - u8 size = fw_cfg_mode->size;
> > -
> > - /* reserve and map mmio or ioport region */
> > - if (fw_cfg_mode->is_mmio) {
> > - if (!request_mem_region(base, size, fw_cfg_mode->name))
> > - continue;
> > - fw_cfg_dev_base = ioremap(base, size);
> > - if (!fw_cfg_dev_base) {
> > - release_mem_region(base, size);
> > - continue;
> > - }
> > - } else {
> > - if (!request_region(base, size, fw_cfg_mode->name))
> > - continue;
> > - fw_cfg_dev_base = ioport_map(base, size);
> > - if (!fw_cfg_dev_base) {
> > - release_region(base, size);
> > - continue;
> > - }
> > - }
> > + err = acpi_bus_get_status(dev);
> > + if (err < 0)
> > + return err;
> >
> > - /* set control and data register addresses */
> > - fw_cfg_reg_ctrl = fw_cfg_dev_base + fw_cfg_mode->ctrl_offset;
> > - fw_cfg_reg_data = fw_cfg_dev_base + fw_cfg_mode->data_offset;
> > + if (!(dev->status.enabled && dev->status.functional))
> > + return -ENODEV;
> >
> > - /* verify fw_cfg device signature */
> > - fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
> > - if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) == 0)
> > - /* success, we're done */
> > - return 0;
> > + /* extract device i/o details from _CRS */
> > + status = acpi_walk_resources(dev->handle, METHOD_NAME__CRS,
> > + fw_cfg_walk_crs, NULL);
> > + if (status != AE_OK || !fw_cfg_phys_base)
> > + return -ENODEV;
> >
> > - /* clean up before probing next access mode */
> > + /* verify fw_cfg device signature */
> > + fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
> > + if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
> > fw_cfg_io_cleanup();
> > + return -ENODEV;
> > }
> >
> > - return -ENODEV;
> > + return 0;
> > }
> >
> > /* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
> > @@ -353,7 +351,7 @@ static struct kobject *fw_cfg_top_ko;
> > static struct kobject *fw_cfg_sel_ko;
> >
> > /* callback function to register an individual fw_cfg file */
> > -static int __init fw_cfg_register_file(const struct fw_cfg_file *f)
> > +static int fw_cfg_register_file(const struct fw_cfg_file *f)
> > {
> > int err;
> > struct fw_cfg_sysfs_entry *entry;
> > @@ -397,12 +395,12 @@ static inline void fw_cfg_kobj_cleanup(struct kobject *kobj)
> > kobject_put(kobj);
> > }
> >
> > -static int __init fw_cfg_sysfs_init(void)
> > +static int fw_cfg_sysfs_add(struct acpi_device *dev)
> > {
> > int err;
> >
> > - /* probe for the fw_cfg "hardware" */
> > - err = fw_cfg_io_probe();
> > + /* initialize fw_cfg device i/o from ACPI data */
> > + err = fw_cfg_acpi_init(dev);
> > if (err)
> > return err;
> >
> > @@ -443,14 +441,31 @@ err_top:
> > return err;
> > }
> >
> > -static void __exit fw_cfg_sysfs_exit(void)
> > +static int fw_cfg_sysfs_remove(struct acpi_device *dev)
> > {
> > pr_debug("fw_cfg: unloading.\n");
> > fw_cfg_sysfs_cache_cleanup();
> > fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
> > fw_cfg_kobj_cleanup(fw_cfg_top_ko);
> > fw_cfg_io_cleanup();
> > + return 0;
> > }
> >
> > -module_init(fw_cfg_sysfs_init);
> > -module_exit(fw_cfg_sysfs_exit);
> > +static const struct acpi_device_id fw_cfg_sysfs_device_ids[] = {
> > + { "QEMU0002", 0 },
> > + { "", 0 },
> > +};
> > +MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_device_ids);
> > +
> > +static struct acpi_driver fw_cfg_sysfs_driver = {
> > + .name = "fw_cfg",
> > + .class = "QEMU",
> > + .ids = fw_cfg_sysfs_device_ids,
> > + .ops = {
> > + .add = fw_cfg_sysfs_add,
> > + .remove = fw_cfg_sysfs_remove,
> > + },
> > + .owner = THIS_MODULE,
> > +};
> > +
> > +module_acpi_driver(fw_cfg_sysfs_driver);
> > --
> > 2.4.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/