[RFC 2/2] cxgb4: collect hardware dump in kernel panic

From: Rahul Lakkireddy
Date: Fri Mar 02 2018 - 07:22:13 EST


Pre-allocate dump buffer and register callback to collect hardware/
firmware logs in kernel panic. Free dump buffer on driver unload.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@xxxxxxxxxxx>
Signed-off-by: Ganesh Goudar <ganeshgr@xxxxxxxxxxx>
---
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 6 ++
drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 95 +++++++++++++++++++++++-
drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h | 4 +
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 12 +++
4 files changed, 113 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index d3fa53db61ee..21d095668374 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -50,6 +50,7 @@
#include <linux/net_tstamp.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/ptp_classify.h>
+#include <linux/crash_core.h>
#include <asm/io.h>
#include "t4_chip_type.h"
#include "cxgb4_uld.h"
@@ -568,6 +569,7 @@ enum { /* adapter flags */
FW_OFLD_CONN = (1 << 9),
ROOT_NO_RELAXED_ORDERING = (1 << 10),
SHUTTING_DOWN = (1 << 11),
+ K_CRASH = (1 << 12),
};

enum {
@@ -946,6 +948,10 @@ struct adapter {

/* Ethtool Dump */
struct ethtool_dump eth_dump;
+
+ /* Dump buffer for collecting logs in panic */
+ struct crash_driver_dump dump_buf;
+ struct notifier_block panic_nb;
};

/* Support for "sched-class" command to allow a TX Scheduling Class to be
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index 143686c60234..c10d5e88321f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -383,13 +383,25 @@ static void cxgb4_cudbg_collect_entity(struct cudbg_init *pdbg_init,

static int cudbg_alloc_compress_buff(struct cudbg_init *pdbg_init)
{
+ struct adapter *adap = pdbg_init->adap;
u32 workspace_size;

workspace_size = cudbg_get_workspace_size();
- pdbg_init->compress_buff = vzalloc(CUDBG_COMPRESS_BUFF_SIZE +
- workspace_size);
- if (!pdbg_init->compress_buff)
- return -ENOMEM;
+
+ if (adap->flags & K_CRASH) {
+ /* In panic scenario, the compression buffer is already
+ * allocated. So, just update accordingly.
+ */
+ pdbg_init->compress_buff = (u8 *)adap->dump_buf.buf +
+ adap->dump_buf.size -
+ workspace_size -
+ CUDBG_COMPRESS_BUFF_SIZE;
+ } else {
+ pdbg_init->compress_buff = vzalloc(CUDBG_COMPRESS_BUFF_SIZE +
+ workspace_size);
+ if (!pdbg_init->compress_buff)
+ return -ENOMEM;
+ }

pdbg_init->compress_buff_size = CUDBG_COMPRESS_BUFF_SIZE;
pdbg_init->workspace = (u8 *)pdbg_init->compress_buff +
@@ -399,6 +411,14 @@ static int cudbg_alloc_compress_buff(struct cudbg_init *pdbg_init)

static void cudbg_free_compress_buff(struct cudbg_init *pdbg_init)
{
+ struct adapter *adap = pdbg_init->adap;
+
+ /* Don't free in panic scenario. We need the buffer to be present
+ * in vmcore so that we can extract the dump.
+ */
+ if (adap->flags & K_CRASH)
+ return;
+
if (pdbg_init->compress_buff)
vfree(pdbg_init->compress_buff);
}
@@ -488,3 +508,70 @@ void cxgb4_init_ethtool_dump(struct adapter *adapter)
adapter->eth_dump.version = adapter->params.fw_vers;
adapter->eth_dump.len = 0;
}
+
+static int cxgb4_panic_notify(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct adapter *adap = container_of(this, struct adapter, panic_nb);
+ bool use_bd;
+ u32 len;
+
+ /* Save original value and restore after collection */
+ use_bd = adap->use_bd;
+
+ dev_info(adap->pdev_dev, "Initialized cxgb4 debug collection...");
+ adap->flags |= K_CRASH;
+
+ /* Don't contact firmware. Directly access registers */
+ adap->use_bd = true;
+
+ len = adap->dump_buf.size;
+ cxgb4_cudbg_collect(adap, adap->dump_buf.buf, &len, CXGB4_ETH_DUMP_ALL);
+ dev_info(adap->pdev_dev, "cxgb4 debug collection done...");
+
+ /* Restore original value */
+ adap->use_bd = use_bd;
+ return NOTIFY_DONE;
+}
+
+int cxgb4_cudbg_register_crash_dump(struct adapter *adap)
+{
+ u32 wsize, len;
+ int ret;
+
+ len = sizeof(struct cudbg_hdr) +
+ sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY;
+ len += CUDBG_DUMP_BUFF_SIZE;
+
+ /* If compression is enabled, allocate extra memory needed for
+ * compression too.
+ */
+ wsize = cudbg_get_workspace_size();
+ if (wsize)
+ wsize += CUDBG_COMPRESS_BUFF_SIZE;
+
+ adap->dump_buf.size = len + wsize;
+ adap->dump_buf.buf = vzalloc(adap->dump_buf.size);
+ if (!adap->dump_buf.buf)
+ return -ENOMEM;
+
+ sprintf(adap->dump_buf.name, "cxgb4_%s", adap->name);
+ adap->panic_nb.notifier_call = cxgb4_panic_notify;
+ adap->panic_nb.priority = INT_MAX;
+
+ ret = crash_driver_dump_register(&adap->dump_buf, &adap->panic_nb);
+ if (ret) {
+ vfree(adap->dump_buf.buf);
+ return ret;
+ }
+
+ return 0;
+}
+
+void cxgb4_cudbg_unregister_crash_dump(struct adapter *adap)
+{
+ if (adap->dump_buf.buf) {
+ crash_driver_dump_unregister(&adap->dump_buf);
+ vfree(adap->dump_buf.buf);
+ }
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
index ce1ac9a1c878..79261313a350 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
@@ -41,8 +41,12 @@ enum CXGB4_ETHTOOL_DUMP_FLAGS {
CXGB4_ETH_DUMP_HW = (1 << 1), /* various FW and HW dumps */
};

+#define CXGB4_ETH_DUMP_ALL (CXGB4_ETH_DUMP_MEM | CXGB4_ETH_DUMP_HW)
+
u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag);
int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
u32 flag);
void cxgb4_init_ethtool_dump(struct adapter *adapter);
+int cxgb4_cudbg_register_crash_dump(struct adapter *adap);
+void cxgb4_cudbg_unregister_crash_dump(struct adapter *adap);
#endif /* __CXGB4_CUDBG_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 7b452e85de2a..64eeffe0ba45 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -5291,6 +5291,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
}

setup_memwin(adapter);
+
+ /* Register panic notifier */
+ err = cxgb4_cudbg_register_crash_dump(adapter);
+ if (err) {
+ dev_warn(adapter->pdev_dev,
+ "Fail registering panic notifier, err: %d. Continuing\n",
+ err);
+ err = 0;
+ }
+
err = adap_init0(adapter);
#ifdef CONFIG_DEBUG_FS
bitmap_zero(adapter->sge.blocked_fl, adapter->sge.egr_sz);
@@ -5538,6 +5548,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
destroy_workqueue(adapter->workq);

kfree(adapter->mbox_log);
+ cxgb4_cudbg_unregister_crash_dump(adapter);
kfree(adapter);
out_unmap_bar0:
iounmap(regs);
@@ -5617,6 +5628,7 @@ static void remove_one(struct pci_dev *pdev)
pci_release_regions(pdev);
kfree(adapter->mbox_log);
synchronize_rcu();
+ cxgb4_cudbg_unregister_crash_dump(adapter);
kfree(adapter);
}

--
2.14.1