[PATCH v2 12/12] scsi: ufs: Improve fatal error logs

From: Subhash Jadavani
Date: Tue Dec 13 2016 - 14:53:05 EST


From: Dolev Raviv <draviv@xxxxxxxxxxxxxx>

Errors such as UIC error, illegal OCS values, and others may require
more information for debugging. Such information could be hibern8 events,
events sequences, recoverable errors, error history, and more.
This patch improves tracking of important errors and events in debug level
to be enabled when debugging a such issues. It includes:
* UIC error history
* Successful hibern8 events
* Successful command after hibern8 exit
* Clk-freq info
* Failed device command
* Infrastructure for dumping host controller debug information

Signed-off-by: Dolev Raviv <draviv@xxxxxxxxxxxxxx>
Signed-off-by: Subhash Jadavani <subhashj@xxxxxxxxxxxxxx>
---
Changes v1 -> v2:
- Added explicit new line character at the end of the printk messages.
---
drivers/scsi/ufs/ufshcd.c | 203 ++++++++++++++++++++++++++++++++++++----------
drivers/scsi/ufs/ufshcd.h | 47 +++++++++++
2 files changed, 208 insertions(+), 42 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 926d1ef..5eb816c 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -346,6 +346,37 @@ static inline void ufshcd_cond_add_cmd_trace(struct ufs_hba *hba,
}
#endif

+static void ufshcd_print_clk_freqs(struct ufs_hba *hba)
+{
+ struct ufs_clk_info *clki;
+ struct list_head *head = &hba->clk_list_head;
+
+ if (!head || list_empty(head))
+ return;
+
+ list_for_each_entry(clki, head, list) {
+ if (!IS_ERR_OR_NULL(clki->clk) && clki->min_freq &&
+ clki->max_freq)
+ dev_err(hba->dev, "clk: %s, rate: %u\n",
+ clki->name, clki->curr_freq);
+ }
+}
+
+static void ufshcd_print_uic_err_hist(struct ufs_hba *hba,
+ struct ufs_uic_err_reg_hist *err_hist, char *err_name)
+{
+ int i;
+
+ for (i = 0; i < UIC_ERR_REG_HIST_LENGTH; i++) {
+ int p = (i + err_hist->pos - 1) % UIC_ERR_REG_HIST_LENGTH;
+
+ if (err_hist->reg[p] == 0)
+ continue;
+ dev_err(hba->dev, "%s[%d] = 0x%x at %lld us\n", err_name, i,
+ err_hist->reg[p], ktime_to_us(err_hist->tstamp[p]));
+ }
+}
+
static void ufshcd_print_host_regs(struct ufs_hba *hba)
{
/*
@@ -362,6 +393,21 @@ static void ufshcd_print_host_regs(struct ufs_hba *hba)
dev_err(hba->dev,
"hba->outstanding_reqs = 0x%x, hba->outstanding_tasks = 0x%x\n",
(u32)hba->outstanding_reqs, (u32)hba->outstanding_tasks);
+ dev_err(hba->dev,
+ "last_hibern8_exit_tstamp at %lld us, hibern8_exit_cnt = %d\n",
+ ktime_to_us(hba->ufs_stats.last_hibern8_exit_tstamp),
+ hba->ufs_stats.hibern8_exit_cnt);
+
+ ufshcd_print_uic_err_hist(hba, &hba->ufs_stats.pa_err, "pa_err");
+ ufshcd_print_uic_err_hist(hba, &hba->ufs_stats.dl_err, "dl_err");
+ ufshcd_print_uic_err_hist(hba, &hba->ufs_stats.nl_err, "nl_err");
+ ufshcd_print_uic_err_hist(hba, &hba->ufs_stats.tl_err, "tl_err");
+ ufshcd_print_uic_err_hist(hba, &hba->ufs_stats.dme_err, "dme_err");
+
+ ufshcd_print_clk_freqs(hba);
+
+ if (hba->vops && hba->vops->dbg_register_dump)
+ hba->vops->dbg_register_dump(hba);
}

static
@@ -373,22 +419,30 @@ void ufshcd_print_trs(struct ufs_hba *hba, unsigned long bitmap, bool pr_prdt)
for_each_set_bit(tag, &bitmap, hba->nutrs) {
lrbp = &hba->lrb[tag];

- dev_err(hba->dev, "UPIU[%d] - Transfer Request Descriptor\n",
- tag);
+ dev_err(hba->dev, "UPIU[%d] - issue time %lld us\n",
+ tag, ktime_to_us(lrbp->issue_time_stamp));
+ dev_err(hba->dev,
+ "UPIU[%d] - Transfer Request Descriptor phys@0x%llx\n",
+ tag, (u64)lrbp->utrd_dma_addr);
+
ufshcd_hex_dump("UPIU TRD: ", lrbp->utr_descriptor_ptr,
sizeof(struct utp_transfer_req_desc));
- dev_err(hba->dev, "UPIU[%d] - Request UPIU\n", tag);
+ dev_err(hba->dev, "UPIU[%d] - Request UPIU phys@0x%llx\n", tag,
+ (u64)lrbp->ucd_req_dma_addr);
ufshcd_hex_dump("UPIU REQ: ", lrbp->ucd_req_ptr,
sizeof(struct utp_upiu_req));
- dev_err(hba->dev, "UPIU[%d] - Response UPIU\n", tag);
+ dev_err(hba->dev, "UPIU[%d] - Response UPIU phys@0x%llx\n", tag,
+ (u64)lrbp->ucd_rsp_dma_addr);
ufshcd_hex_dump("UPIU RSP: ", lrbp->ucd_rsp_ptr,
sizeof(struct utp_upiu_rsp));
if (pr_prdt) {
int prdt_length = le16_to_cpu(
lrbp->utr_descriptor_ptr->prd_table_length);

- dev_err(hba->dev, "UPIU[%d] - PRDT - %d entries\n", tag,
- prdt_length);
+ dev_err(hba->dev,
+ "UPIU[%d] - PRDT - %d entries phys@0x%llx\n",
+ tag, prdt_length,
+ (u64)lrbp->ucd_prdt_dma_addr);
ufshcd_hex_dump("UPIU PRDT: ", lrbp->ucd_prdt_ptr,
sizeof(struct ufshcd_sg_entry) *
prdt_length);
@@ -417,6 +471,32 @@ static void ufshcd_print_tmrs(struct ufs_hba *hba, unsigned long bitmap)
}
}

+/**
+ * ufshcd_print_pwr_info - print power params as saved in hba
+ * power info
+ * @hba: per-adapter instance
+ */
+static void ufshcd_print_pwr_info(struct ufs_hba *hba)
+{
+ static const char * const names[] = {
+ "INVALID MODE",
+ "FAST MODE",
+ "SLOW_MODE",
+ "INVALID MODE",
+ "FASTAUTO_MODE",
+ "SLOWAUTO_MODE",
+ "INVALID MODE",
+ };
+
+ dev_err(hba->dev, "%s:[RX, TX]: gear=[%d, %d], lane[%d, %d], pwr[%s, %s], rate = %d\n",
+ __func__,
+ hba->pwr_info.gear_rx, hba->pwr_info.gear_tx,
+ hba->pwr_info.lane_rx, hba->pwr_info.lane_tx,
+ names[hba->pwr_info.pwr_rx],
+ names[hba->pwr_info.pwr_tx],
+ hba->pwr_info.hs_rate);
+}
+
/*
* ufshcd_wait_for_register - wait for register value to change
* @hba - per-adapter interface
@@ -1233,6 +1313,7 @@ static void ufshcd_clk_scaling_update_busy(struct ufs_hba *hba)
static inline
void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
{
+ hba->lrb[task_tag].issue_time_stamp = ktime_get();
ufshcd_clk_scaling_start_busy(hba);
__set_bit(task_tag, &hba->outstanding_reqs);
ufshcd_writel(hba, 1 << task_tag, REG_UTP_TRANSFER_REQ_DOOR_BELL);
@@ -1914,6 +1995,7 @@ static int ufshcd_compose_dev_cmd(struct ufs_hba *hba,
int resp;
int err = 0;

+ hba->ufs_stats.last_hibern8_exit_tstamp = ktime_set(0, 0);
resp = ufshcd_get_req_rsp(lrbp->ucd_rsp_ptr);

switch (resp) {
@@ -2684,32 +2766,6 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba)
}

/**
- * ufshcd_print_pwr_info - print power params as saved in hba
- * power info
- * @hba: per-adapter instance
- */
-static void ufshcd_print_pwr_info(struct ufs_hba *hba)
-{
- static const char * const names[] = {
- "INVALID MODE",
- "FAST MODE",
- "SLOW_MODE",
- "INVALID MODE",
- "FASTAUTO_MODE",
- "SLOWAUTO_MODE",
- "INVALID MODE",
- };
-
- dev_info(hba->dev, "%s:[RX, TX]: gear=[%d, %d], lane[%d, %d], pwr[%s, %s], rate = %d\n",
- __func__,
- hba->pwr_info.gear_rx, hba->pwr_info.gear_tx,
- hba->pwr_info.lane_rx, hba->pwr_info.lane_tx,
- names[hba->pwr_info.pwr_rx],
- names[hba->pwr_info.pwr_tx],
- hba->pwr_info.hs_rate);
-}
-
-/**
* ufshcd_host_memory_configure - configure local reference block with
* memory offsets
* @hba: per adapter instance
@@ -2771,12 +2827,19 @@ static void ufshcd_host_memory_configure(struct ufs_hba *hba)
}

hba->lrb[i].utr_descriptor_ptr = (utrdlp + i);
+ hba->lrb[i].utrd_dma_addr = hba->utrdl_dma_addr +
+ (i * sizeof(struct utp_transfer_req_desc));
hba->lrb[i].ucd_req_ptr =
(struct utp_upiu_req *)(cmd_descp + i);
+ hba->lrb[i].ucd_req_dma_addr = cmd_desc_element_addr;
hba->lrb[i].ucd_rsp_ptr =
(struct utp_upiu_rsp *)cmd_descp[i].response_upiu;
+ hba->lrb[i].ucd_rsp_dma_addr = cmd_desc_element_addr +
+ response_offset;
hba->lrb[i].ucd_prdt_ptr =
(struct ufshcd_sg_entry *)cmd_descp[i].prd_table;
+ hba->lrb[i].ucd_prdt_dma_addr = cmd_desc_element_addr +
+ prdt_offset;
}
}

@@ -2800,7 +2863,7 @@ static int ufshcd_dme_link_startup(struct ufs_hba *hba)

ret = ufshcd_send_uic_cmd(hba, &uic_cmd);
if (ret)
- dev_err(hba->dev,
+ dev_dbg(hba->dev,
"dme-link-startup: error code %d\n", ret);
return ret;
}
@@ -3150,9 +3213,12 @@ static int ufshcd_uic_hibern8_exit(struct ufs_hba *hba)
dev_err(hba->dev, "%s: hibern8 exit failed. ret = %d\n",
__func__, ret);
ret = ufshcd_link_recovery(hba);
- } else
+ } else {
ufshcd_vops_hibern8_notify(hba, UIC_CMD_DME_HIBER_EXIT,
POST_CHANGE);
+ hba->ufs_stats.last_hibern8_exit_tstamp = ktime_get();
+ hba->ufs_stats.hibern8_exit_cnt++;
+ }

return ret;
}
@@ -3922,7 +3988,7 @@ static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index, u8 *resp)
switch (ocs) {
case OCS_SUCCESS:
result = ufshcd_get_req_rsp(lrbp->ucd_rsp_ptr);
-
+ hba->ufs_stats.last_hibern8_exit_tstamp = ktime_set(0, 0);
switch (result) {
case UPIU_TRANSACTION_RESPONSE:
/*
@@ -3983,7 +4049,9 @@ static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index, u8 *resp)
default:
result |= DID_ERROR << 16;
dev_err(hba->dev,
- "OCS error from controller = %x\n", ocs);
+ "OCS error from controller = %x for tag %d\n",
+ ocs, lrbp->task_tag);
+ ufshcd_print_host_regs(hba);
break;
} /* end of switch */

@@ -4592,6 +4660,14 @@ static void ufshcd_err_handler(struct work_struct *work)
pm_runtime_put_sync(hba->dev);
}

+static void ufshcd_update_uic_reg_hist(struct ufs_uic_err_reg_hist *reg_hist,
+ u32 reg)
+{
+ reg_hist->reg[reg_hist->pos] = reg;
+ reg_hist->tstamp[reg_hist->pos] = ktime_get();
+ reg_hist->pos = (reg_hist->pos + 1) % UIC_ERR_REG_HIST_LENGTH;
+}
+
/**
* ufshcd_update_uic_error - check and set fatal UIC error flags.
* @hba: per-adapter instance
@@ -4604,15 +4680,20 @@ static void ufshcd_update_uic_error(struct ufs_hba *hba)
reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_PHY_ADAPTER_LAYER);
/* Ignore LINERESET indication, as this is not an error */
if ((reg & UIC_PHY_ADAPTER_LAYER_ERROR) &&
- (reg & UIC_PHY_ADAPTER_LAYER_LANE_ERR_MASK))
+ (reg & UIC_PHY_ADAPTER_LAYER_LANE_ERR_MASK)) {
/*
* To know whether this error is fatal or not, DB timeout
* must be checked but this error is handled separately.
*/
dev_dbg(hba->dev, "%s: UIC Lane error reported\n", __func__);
+ ufshcd_update_uic_reg_hist(&hba->ufs_stats.pa_err, reg);
+ }

/* PA_INIT_ERROR is fatal and needs UIC reset */
reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
+ if (reg)
+ ufshcd_update_uic_reg_hist(&hba->ufs_stats.dl_err, reg);
+
if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;
else if (hba->dev_quirks &
@@ -4626,16 +4707,22 @@ static void ufshcd_update_uic_error(struct ufs_hba *hba)

/* UIC NL/TL/DME errors needs software retry */
reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER);
- if (reg)
+ if (reg) {
+ ufshcd_update_uic_reg_hist(&hba->ufs_stats.nl_err, reg);
hba->uic_error |= UFSHCD_UIC_NL_ERROR;
+ }

reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_TRANSPORT_LAYER);
- if (reg)
+ if (reg) {
+ ufshcd_update_uic_reg_hist(&hba->ufs_stats.tl_err, reg);
hba->uic_error |= UFSHCD_UIC_TL_ERROR;
+ }

reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DME);
- if (reg)
+ if (reg) {
+ ufshcd_update_uic_reg_hist(&hba->ufs_stats.dme_err, reg);
hba->uic_error |= UFSHCD_UIC_DME_ERROR;
+ }

dev_dbg(hba->dev, "%s: UIC error flags = 0x%08x\n",
__func__, hba->uic_error);
@@ -5002,12 +5089,16 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
UFS_QUERY_TASK, &resp);
if (!err && resp == UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED) {
/* cmd pending in the device */
+ dev_err(hba->dev, "%s: cmd pending in the device. tag = %d\n",
+ __func__, tag);
break;
} else if (!err && resp == UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
/*
* cmd not pending in the device, check if it is
* in transition.
*/
+ dev_err(hba->dev, "%s: cmd at tag %d not pending in the device.\n",
+ __func__, tag);
reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
if (reg & (1 << tag)) {
/* sleep for max. 200us to stabilize */
@@ -5015,8 +5106,13 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
continue;
}
/* command completed already */
+ dev_err(hba->dev, "%s: cmd at tag %d successfully cleared from DB.\n",
+ __func__, tag);
goto out;
} else {
+ dev_err(hba->dev,
+ "%s: no response from device. tag = %d, err %d\n",
+ __func__, tag, err);
if (!err)
err = resp; /* service response error */
goto out;
@@ -5031,14 +5127,20 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
UFS_ABORT_TASK, &resp);
if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
- if (!err)
+ if (!err) {
err = resp; /* service response error */
+ dev_err(hba->dev, "%s: issued. tag = %d, err %d\n",
+ __func__, tag, err);
+ }
goto out;
}

err = ufshcd_clear_cmd(hba, tag);
- if (err)
+ if (err) {
+ dev_err(hba->dev, "%s: Failed clearing cmd at tag %d, err %d\n",
+ __func__, tag, err);
goto out;
+ }

scsi_dma_unmap(cmd);

@@ -5620,6 +5722,20 @@ static void ufshcd_tune_unipro_params(struct ufs_hba *hba)
ufshcd_vops_apply_dev_quirks(hba);
}

+static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba)
+{
+ int err_reg_hist_size = sizeof(struct ufs_uic_err_reg_hist);
+
+ hba->ufs_stats.hibern8_exit_cnt = 0;
+ hba->ufs_stats.last_hibern8_exit_tstamp = ktime_set(0, 0);
+
+ memset(&hba->ufs_stats.pa_err, 0, err_reg_hist_size);
+ memset(&hba->ufs_stats.dl_err, 0, err_reg_hist_size);
+ memset(&hba->ufs_stats.nl_err, 0, err_reg_hist_size);
+ memset(&hba->ufs_stats.tl_err, 0, err_reg_hist_size);
+ memset(&hba->ufs_stats.dme_err, 0, err_reg_hist_size);
+}
+
/**
* ufshcd_probe_hba - probe hba to detect device and initialize
* @hba: per-adapter instance
@@ -5639,6 +5755,9 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
hba->urgent_bkops_lvl = BKOPS_STATUS_PERF_IMPACT;
hba->is_urgent_bkops_lvl_checked = false;

+ /* Debug counters initialization */
+ ufshcd_clear_dbg_ufs_stats(hba);
+
/* UniPro link is active now */
ufshcd_set_link_active(hba);

diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 05d2055..5189242 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -152,6 +152,10 @@ struct ufs_pm_lvl_states {
* @ucd_req_ptr: UCD address of the command
* @ucd_rsp_ptr: Response UPIU address for this command
* @ucd_prdt_ptr: PRDT address of the command
+ * @utrd_dma_addr: UTRD dma address for debug
+ * @ucd_prdt_dma_addr: PRDT dma address for debug
+ * @ucd_rsp_dma_addr: UPIU response dma address for debug
+ * @ucd_req_dma_addr: UPIU request dma address for debug
* @cmd: pointer to SCSI command
* @sense_buffer: pointer to sense buffer address of the SCSI command
* @sense_bufflen: Length of the sense buffer
@@ -160,6 +164,7 @@ struct ufs_pm_lvl_states {
* @task_tag: Task tag of the command
* @lun: LUN of the command
* @intr_cmd: Interrupt command (doesn't participate in interrupt aggregation)
+ * @issue_time_stamp: time stamp for debug purposes
*/
struct ufshcd_lrb {
struct utp_transfer_req_desc *utr_descriptor_ptr;
@@ -167,6 +172,11 @@ struct ufshcd_lrb {
struct utp_upiu_rsp *ucd_rsp_ptr;
struct ufshcd_sg_entry *ucd_prdt_ptr;

+ dma_addr_t utrd_dma_addr;
+ dma_addr_t ucd_req_dma_addr;
+ dma_addr_t ucd_rsp_dma_addr;
+ dma_addr_t ucd_prdt_dma_addr;
+
struct scsi_cmnd *cmd;
u8 *sense_buffer;
unsigned int sense_bufflen;
@@ -176,6 +186,7 @@ struct ufshcd_lrb {
int task_tag;
u8 lun; /* UPIU LUN id field is only 8-bit wide */
bool intr_cmd;
+ ktime_t issue_time_stamp;
};

/**
@@ -355,6 +366,41 @@ struct ufs_init_prefetch {
u32 icc_level;
};

+#define UIC_ERR_REG_HIST_LENGTH 8
+/**
+ * struct ufs_uic_err_reg_hist - keeps history of uic errors
+ * @pos: index to indicate cyclic buffer position
+ * @reg: cyclic buffer for registers value
+ * @tstamp: cyclic buffer for time stamp
+ */
+struct ufs_uic_err_reg_hist {
+ int pos;
+ u32 reg[UIC_ERR_REG_HIST_LENGTH];
+ ktime_t tstamp[UIC_ERR_REG_HIST_LENGTH];
+};
+
+/**
+ * struct ufs_stats - keeps usage/err statistics
+ * @hibern8_exit_cnt: Counter to keep track of number of exits,
+ * reset this after link-startup.
+ * @last_hibern8_exit_tstamp: Set time after the hibern8 exit.
+ * Clear after the first successful command completion.
+ * @pa_err: tracks pa-uic errors
+ * @dl_err: tracks dl-uic errors
+ * @nl_err: tracks nl-uic errors
+ * @tl_err: tracks tl-uic errors
+ * @dme_err: tracks dme errors
+ */
+struct ufs_stats {
+ u32 hibern8_exit_cnt;
+ ktime_t last_hibern8_exit_tstamp;
+ struct ufs_uic_err_reg_hist pa_err;
+ struct ufs_uic_err_reg_hist dl_err;
+ struct ufs_uic_err_reg_hist nl_err;
+ struct ufs_uic_err_reg_hist tl_err;
+ struct ufs_uic_err_reg_hist dme_err;
+};
+
/**
* struct ufs_hba - per adapter private structure
* @mmio_base: UFSHCI base register address
@@ -531,6 +577,7 @@ struct ufs_hba {
u32 uic_error;
u32 saved_err;
u32 saved_uic_err;
+ struct ufs_stats ufs_stats;

/* Device management request data */
struct ufs_dev_cmd dev_cmd;
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project