Re: Issue with panic handling and ipmi

From: Corey Minyard
Date: Fri Sep 17 2021 - 08:08:20 EST


On Fri, Sep 17, 2021 at 12:14:19PM +0200, Anton Lundin wrote:
> On 16 September, 2021 - Corey Minyard wrote:
>
> > On Thu, Sep 16, 2021 at 04:53:00PM +0200, Anton Lundin wrote:
> > > Hi.
> > >
> > > I've just done a upgrade of the kernel we're using in a product from
> > > 4.19 to 5.10 and I noted a issue.
> > >
> > > It started that with that we didn't get panic and oops dumps in our erst
> > > backed pstore, and when debugging that I noted that the reboot on panic
> > > timer didn't work either.
> > >
> > > I've bisected it down to 2033f6858970 ("ipmi: Free receive messages when
> > > in an oops").
> >
> > Hmm. Unfortunately removing that will break other things. Can you try
> > the following patch? It's a good idea, in general, to do as little as
> > possible in the panic path, this should cover a multitude of issues.
> >
> > Thanks for the report.
> >
>
> I'm sorry to report that the patch didn't solve the issue, and the
> machine locked up in the panic path as before.

I missed something. Can you try the following? If this doesn't work,
I'm going to have to figure out how to reproduce this.

Thanks,

-corey

commit f253c87772b65e2a5971e82dc81ee63d6e9848cf
Author: Corey Minyard <cminyard@xxxxxxxxxx>
Date: Thu Sep 16 11:36:20 2021 -0500

ipmi: Disable some operations during a panic

Don't do kfree or other risky things when oops_in_progress is set.

Reported-by: Anton Lundin <glance@xxxxxxxxxx>
Fixes: 2033f6858970 ("ipmi: Free receive messages when in an oops")
Signed-off-by: Corey Minyard <cminyard@xxxxxxxxxx>

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index e96cb5c4f97a..a08f53f208bf 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -4789,7 +4789,9 @@ static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0);
static void free_smi_msg(struct ipmi_smi_msg *msg)
{
atomic_dec(&smi_msg_inuse_count);
- kfree(msg);
+ /* Try to keep as much stuff out of the panic path as possible. */
+ if (!oops_in_progress)
+ kfree(msg);
}

struct ipmi_smi_msg *ipmi_alloc_smi_msg(void)
@@ -4808,7 +4810,9 @@ EXPORT_SYMBOL(ipmi_alloc_smi_msg);
static void free_recv_msg(struct ipmi_recv_msg *msg)
{
atomic_dec(&recv_msg_inuse_count);
- kfree(msg);
+ /* Try to keep as much stuff out of the panic path as possible. */
+ if (!oops_in_progress)
+ kfree(msg);
}

static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void)
@@ -4826,7 +4830,7 @@ static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void)

void ipmi_free_recv_msg(struct ipmi_recv_msg *msg)
{
- if (msg->user)
+ if (msg->user && !oops_in_progress)
kref_put(&msg->user->refcount, free_user);
msg->done(msg);
}
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index e4ff3b50de7f..7f71471c7a46 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -342,13 +342,17 @@ static atomic_t msg_tofree = ATOMIC_INIT(0);
static DECLARE_COMPLETION(msg_wait);
static void msg_free_smi(struct ipmi_smi_msg *msg)
{
- if (atomic_dec_and_test(&msg_tofree))
- complete(&msg_wait);
+ if (atomic_dec_and_test(&msg_tofree)) {
+ if (!oops_in_progress)
+ complete(&msg_wait);
+ }
}
static void msg_free_recv(struct ipmi_recv_msg *msg)
{
- if (atomic_dec_and_test(&msg_tofree))
- complete(&msg_wait);
+ if (atomic_dec_and_test(&msg_tofree)) {
+ if (!oops_in_progress)
+ complete(&msg_wait);
+ }
}
static struct ipmi_smi_msg smi_msg = {
.done = msg_free_smi
@@ -434,8 +438,10 @@ static int _ipmi_set_timeout(int do_heartbeat)
rv = __ipmi_set_timeout(&smi_msg,
&recv_msg,
&send_heartbeat_now);
- if (rv)
+ if (rv) {
+ atomic_set(&msg_tofree, 0);
return rv;
+ }

wait_for_completion(&msg_wait);

@@ -580,6 +586,7 @@ static int __ipmi_heartbeat(void)
&recv_msg,
1);
if (rv) {
+ atomic_set(&msg_tofree, 0);
pr_warn("heartbeat send failure: %d\n", rv);
return rv;
}