RE: [PATCH 3/3] Xen physical cpus interface (V2)

From: Liu, Jinsong
Date: Fri May 11 2012 - 13:28:38 EST


Konrad Rzeszutek Wilk wrote:
>> +static void pcpu_sys_remove(struct pcpu *pcpu)
>> +{
>> + struct device *dev;
>> +
>> + if (!pcpu)
>> + return;
>> +
>> + dev = &pcpu->dev;
>> + if (dev->id)
>> + device_remove_file(dev, &dev_attr_online);
>> +
>> + device_unregister(dev);
>> +}
>> +
>> +static void free_pcpu(struct pcpu *pcpu)
>
> 1) So this isn't just freeing the PCPU, it also unregisters
> the SysFS.
>
> As such you should call this differently:
>
> unregister_and_free(struct pcpu *pcpu)

Fine, rename 2 funcs:
init_pcpu --> int_and_register_pcpu
free_pcpu --> unregister_and_free_pcpu

>
> or do the unregistration part seperatly.
>
> 2) But there is also another issue - a possiblity of race.
>
> The user might be running:
> while (true)
> do
> echo 1 > online
> echo 0 > online
> done
>
> and the the sync_pcpu will end up calling pcpu_sys_remove and
> free the pcpu. The SysFS aren't deleted until all of the

No race here. echo x > online would not change cpu present map.

> object references (kref's) have been put. So when you get to
> 'kfree(pcpu)' you might be still holding a reference (meaning
> the user is doing 'echo 0 > online') and crash.
>
> I think you need to hold the mutex in the store_online() function
> (not good), or better yet, make a device_release function
> that will be called when the last kref has been put.
>
>
> 3) Third the name. These functions all depend on the mutex lock being
> held. As such add a postfix to the name of the function of
> "_locked" or a prefix of "__' so it is known that the caller holds
> the mutex.

OK, add _locked postfix to these functions.

Thanks,
Jinsong

>
>> +{
>> + if (!pcpu)
>> + return;
>> +
>> + pcpu_sys_remove(pcpu);
>> +
>> + list_del(&pcpu->list);
>> + kfree(pcpu);
>> +}
>> +
>> +static int pcpu_sys_create(struct pcpu *pcpu)
>> +{
>> + struct device *dev;
>> + int err = -EINVAL;
>> +
>> + if (!pcpu)
>> + return err;
>> +
>> + dev = &pcpu->dev;
>> + dev->bus = &xen_pcpu_subsys;
>> + dev->id = pcpu->cpu_id;
>> +
>> + err = device_register(dev);
>> + if (err)
>> + return err;
>> +
>> + /*
>> + * Xen never offline cpu0 due to several restrictions
>> + * and assumptions. This basically doesn't add a sys control
>> + * to user, one cannot attempt to offline BSP.
>> + */
>> + if (dev->id) {
>> + err = device_create_file(dev, &dev_attr_online); + if (err) {
>> + device_unregister(dev);
>> + return err;
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static struct pcpu *init_pcpu(struct xenpf_pcpuinfo *info) +{
>> + struct pcpu *pcpu;
>> + int err;
>> +
>> + if (info->flags & XEN_PCPU_FLAGS_INVALID)
>> + return ERR_PTR(-ENODEV);
>> +
>> + pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL);
>> + if (!pcpu)
>> + return ERR_PTR(-ENOMEM);
>> +
>> + INIT_LIST_HEAD(&pcpu->list);
>> + pcpu->cpu_id = info->xen_cpuid;
>> + pcpu->flags = info->flags;
>> +
>> + err = pcpu_sys_create(pcpu);
>> + if (err) {
>> + pr_warning(XEN_PCPU "Failed to register pcpu%u\n", +
>> info->xen_cpuid); + kfree(pcpu);
>> + return ERR_PTR(-ENOENT);
>> + }
>> +
>> + /* Need hold on xen_pcpu_lock before pcpu list manipulations */
>> + list_add_tail(&pcpu->list, &xen_pcpus);
>> + return pcpu;
>> +}
>> +
>> +/*
>> + * Caller should hold the xen_pcpu_lock
>> + */
>> +static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu) +{
>> + int ret;
>> + struct pcpu *pcpu = NULL;
>> + struct xenpf_pcpuinfo *info;
>> + struct xen_platform_op op = {
>> + .cmd = XENPF_get_cpuinfo,
>> + .interface_version = XENPF_INTERFACE_VERSION,
>> + .u.pcpu_info.xen_cpuid = cpu,
>> + };
>> +
>> + ret = HYPERVISOR_dom0_op(&op);
>> + if (ret)
>> + return ret;
>> +
>> + info = &op.u.pcpu_info;
>> + if (max_cpu)
>> + *max_cpu = info->max_present;
>> +
>> + pcpu = get_pcpu(cpu);
>> +
>> + if (info->flags & XEN_PCPU_FLAGS_INVALID) {
>> + /* The pcpu has been removed */
>> + if (pcpu)
>> + free_pcpu(pcpu);
>> + return 0;
>> + }
>> +
>> + if (!pcpu) {
>> + pcpu = init_pcpu(info);
>> + if (IS_ERR_OR_NULL(pcpu))
>> + return -ENODEV;
>> + } else
>> + pcpu_online_status(info, pcpu);
>> +
>> + return 0;
>> +}
>> +
>> +/*
>> + * Sync dom0's pcpu information with xen hypervisor's + */
>> +static int xen_sync_pcpus(void)
>> +{
>> + /*
>> + * Boot cpu always have cpu_id 0 in xen
>> + */
>> + uint32_t cpu = 0, max_cpu = 0;
>> + int err = 0;
>> + struct list_head *cur, *tmp;
>> + struct pcpu *pcpu;
>> +
>> + mutex_lock(&xen_pcpu_lock);
>> +
>> + while (!err && (cpu <= max_cpu)) {
>> + err = sync_pcpu(cpu, &max_cpu);
>> + cpu++;
>> + }
>> +
>> + if (err) {
>> + list_for_each_safe(cur, tmp, &xen_pcpus) {
>> + pcpu = list_entry(cur, struct pcpu, list);
>> + free_pcpu(pcpu);
>> + }
>> + }
>> +
>> + mutex_unlock(&xen_pcpu_lock);
>> +
>> + return err;
>> +}
>> +
>> +static void xen_pcpu_work_fn(struct work_struct *work) +{
>> + xen_sync_pcpus();
>> +}
>> +static DECLARE_WORK(xen_pcpu_work, xen_pcpu_work_fn); +
>> +static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id) +{
>> + schedule_work(&xen_pcpu_work);
>> + return IRQ_HANDLED;
>> +}
>> +
>> +static int __init xen_pcpu_init(void)
>> +{
>> + int irq, ret;
>> +
>> + if (!xen_initial_domain())
>> + return -ENODEV;
>> +
>> + irq = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
>> + xen_pcpu_interrupt, 0,
>> + "xen-pcpu", NULL);
>> + if (irq < 0) {
>> + pr_warning(XEN_PCPU "Failed to bind pcpu virq\n"); + return irq;
>> + }
>> +
>> + ret = subsys_system_register(&xen_pcpu_subsys, NULL); + if (ret) {
>> + pr_warning(XEN_PCPU "Failed to register pcpu subsys\n"); + goto
>> err1; + }
>> +
>> + ret = xen_sync_pcpus();
>> + if (ret) {
>> + pr_warning(XEN_PCPU "Failed to sync pcpu info\n"); + goto err2;
>> + }
>> +
>> + return 0;
>> +
>> +err2:
>> + bus_unregister(&xen_pcpu_subsys);
>> +err1:
>> + unbind_from_irqhandler(irq, NULL);
>> + return ret;
>> +}
>> +arch_initcall(xen_pcpu_init);
>> diff --git a/include/xen/interface/platform.h
>> b/include/xen/interface/platform.h index 486653f..61fa661 100644 ---
>> a/include/xen/interface/platform.h +++
>> b/include/xen/interface/platform.h @@ -314,6 +314,13 @@ struct
>> xenpf_pcpuinfo { };
>> DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo);
>>
>> +#define XENPF_cpu_online 56
>> +#define XENPF_cpu_offline 57
>> +struct xenpf_cpu_ol {
>> + uint32_t cpuid;
>> +};
>> +DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol);
>> +
>> struct xen_platform_op {
>> uint32_t cmd;
>> uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
>> @@ -330,6 +337,7 @@ struct xen_platform_op {
>> struct xenpf_getidletime getidletime;
>> struct xenpf_set_processor_pminfo set_pminfo;
>> struct xenpf_pcpuinfo pcpu_info;
>> + struct xenpf_cpu_ol cpu_ol;
>> uint8_t pad[128];
>> } u;
>> };
>> diff --git a/include/xen/interface/xen.h
>> b/include/xen/interface/xen.h
>> index a890804..0801468 100644
>> --- a/include/xen/interface/xen.h
>> +++ b/include/xen/interface/xen.h
>> @@ -80,6 +80,7 @@
>> #define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency
>> console. */ #define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event
>> for some domain. */ #define VIRQ_DEBUGGER 6 /* (DOM0) A domain
>> has paused for debugging. */ +#define VIRQ_PCPU_STATE 9 /* (DOM0)
>> PCPU state changed */
>>
>> /* Architecture-specific VIRQ definitions. */
>> #define VIRQ_ARCH_0 16
>> --
>> 1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/