Re: [PATCH 2/2] misc: Register a PPI for the vcpu stall detection virtual device

From: Sebastian Ene
Date: Tue Jun 11 2024 - 06:49:45 EST


On Fri, May 24, 2024 at 08:00:42PM +0100, Conor Dooley wrote:
> On Thu, May 23, 2024 at 04:04:13PM +0000, Sebastian Ene wrote:
> > Request a PPI for each vCPU during probe which will be used by the host
> > to communicate a stall detected event on the vCPU. When the host raises
> > this interrupt from the virtual machine monitor, the guest is expected to
> > handle the interrupt and panic.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@xxxxxxxxxx>
> > ---
> > drivers/misc/vcpu_stall_detector.c | 41 ++++++++++++++++++++++++++++--
> > 1 file changed, 39 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/misc/vcpu_stall_detector.c b/drivers/misc/vcpu_stall_detector.c
> > index e2015c87f03f..c580cd7fd225 100644
> > --- a/drivers/misc/vcpu_stall_detector.c
> > +++ b/drivers/misc/vcpu_stall_detector.c
> > @@ -32,6 +32,7 @@
> > struct vcpu_stall_detect_config {
> > u32 clock_freq_hz;
> > u32 stall_timeout_sec;
> > + int ppi_irq;
> >
> > void __iomem *membase;
> > struct platform_device *dev;
> > @@ -77,6 +78,12 @@ vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)
> > return HRTIMER_RESTART;
> > }
> >
> > +static irqreturn_t vcpu_stall_detector_irq(int irq, void *dev)
> > +{
> > + panic("vCPU stall detector");
> > + return IRQ_HANDLED;
> > +}
> > +
> > static int start_stall_detector_cpu(unsigned int cpu)
> > {
> > u32 ticks, ping_timeout_ms;
> > @@ -132,7 +139,7 @@ static int stop_stall_detector_cpu(unsigned int cpu)
> >
> > static int vcpu_stall_detect_probe(struct platform_device *pdev)
> > {
> > - int ret;
> > + int ret, irq, num_irqs;
> > struct resource *r;
> > void __iomem *membase;
> > u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
> > @@ -169,9 +176,32 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
> > vcpu_stall_config = (struct vcpu_stall_detect_config) {
> > .membase = membase,
> > .clock_freq_hz = clock_freq_hz,
> > - .stall_timeout_sec = stall_timeout_sec
> > + .stall_timeout_sec = stall_timeout_sec,
> > + .ppi_irq = -1,
> > };
> >
> > + num_irqs = platform_irq_count(pdev);
> > + if (num_irqs < 0) {
> > + dev_err(&pdev->dev, "Failed to get irqs\n");

Hello Conor,


>
> platform_irq_count() either returns a number or EPROBE_DEFER, I don't
> think emitting an error on deferred probe is the correct thing to do
> here?

I will drop this.


> > + ret = num_irqs;
> > + goto err;
> > + } else if (num_irqs > 1) {
> > + dev_err(&pdev->dev, "Multipple irqs detected\n");
>
> Typo. I don't really see why you're going to this level of complexity
> though, why aren't you just doing a single get_irq_optional()?
>

Thanks for the feedback, I simplified it by using the
platform_get_irq_optional as you suggested.


> > + ret = -EINVAL;
> > + goto err;
> > + } else if (num_irqs == 1) {
> > + irq = platform_get_irq(pdev, 0);
> > + if ((irq > 0) && irq_is_percpu_devid(irq)) {
> > + ret = request_percpu_irq(irq,
> > + vcpu_stall_detector_irq,
> > + "vcpu_stall_detector",
> > + vcpu_stall_detectors);
> > + if (!ret)
> > + vcpu_stall_config.ppi_irq = irq;
> > +
> > + }
> > + }
> > +
> > ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
> > "virt/vcpu_stall_detector:online",
> > start_stall_detector_cpu,
> > @@ -184,6 +214,9 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
> > vcpu_stall_config.hp_online = ret;
> > return 0;
> > err:
> > + if (vcpu_stall_config.ppi_irq > 0)
> > + free_percpu_irq(vcpu_stall_config.ppi_irq,
> > + vcpu_stall_detectors);
> > return ret;
> > }
> >
> > @@ -193,6 +226,10 @@ static void vcpu_stall_detect_remove(struct platform_device *pdev)
> >
> > cpuhp_remove_state(vcpu_stall_config.hp_online);
> >
> > + if (vcpu_stall_config.ppi_irq > 0)
> > + free_percpu_irq(vcpu_stall_config.ppi_irq,
> > + vcpu_stall_detectors);
> > +
> > for_each_possible_cpu(cpu)
> > stop_stall_detector_cpu(cpu);
> > }
> > --
> > 2.45.1.288.g0e0cd299f1-goog
> >
> >

Cheers,
Seb