[RFC][PATCH] scsi: Detailed I/O errors

From: Hannes Reinecke
Date: Mon Aug 30 2010 - 10:21:10 EST


Instead of just passing 'EIO' for any I/O errors we should be
notifying the upper layers with some more details about the cause
of this error.
This patch updates the possible I/O errors to:

- ENOLINK: Link failure between host and target
- EIO: Retryable I/O error
- EREMOTEIO: Non-retryable I/O error

'Retryable' in this context means that an I/O error _might_ be
restricted to the I_T_L nexus (vulgo: path), so retrying on another
nexus / path might succeed.

Signed-off-by: Hannes Reinecke <hare@xxxxxxx>

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 487ecda..d49b375 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1270,7 +1270,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
if (!error && !clone->errors)
return 0; /* I/O complete */

- if (error == -EOPNOTSUPP)
+ if (error == -EOPNOTSUPP || error == -EREMOTEIO)
return error;

if (clone->cmd_flags & REQ_DISCARD)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ce089df..5da040b 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -223,7 +223,7 @@ static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
* @scmd: Cmd to have sense checked.
*
* Return value:
- * SUCCESS or FAILED or NEEDS_RETRY
+ * SUCCESS or FAILED or NEEDS_RETRY or TARGET_ERROR
*
* Notes:
* When a deferred error is detected the current command has
@@ -338,25 +338,25 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)
case COPY_ABORTED:
case VOLUME_OVERFLOW:
case MISCOMPARE:
- return SUCCESS;
+ case DATA_PROTECT:
+ case BLANK_CHECK:
+ return TARGET_ERROR;

case MEDIUM_ERROR:
if (sshdr.asc == 0x11 || /* UNRECOVERED READ ERR */
sshdr.asc == 0x13 || /* AMNF DATA FIELD */
sshdr.asc == 0x14) { /* RECORD NOT FOUND */
- return SUCCESS;
+ return TARGET_ERROR;
}
return NEEDS_RETRY;

case HARDWARE_ERROR:
if (scmd->device->retry_hwerror)
return ADD_TO_MLQUEUE;
- else
- return SUCCESS;
-
+ else {
+ return TARGET_ERROR;
+ }
case ILLEGAL_REQUEST:
- case BLANK_CHECK:
- case DATA_PROTECT:
default:
return SUCCESS;
}
@@ -819,6 +819,7 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
case SUCCESS:
case NEEDS_RETRY:
case FAILED:
+ case TARGET_ERROR:
break;
case ADD_TO_MLQUEUE:
rtn = NEEDS_RETRY;
@@ -1512,6 +1513,12 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
rtn = scsi_check_sense(scmd);
if (rtn == NEEDS_RETRY)
goto maybe_retry;
+ if (rtn == TARGET_ERROR) {
+ /* Need to modify host byte to signal a
+ * permanent target failure */
+ scmd->result |= (DID_TARGET_FAILURE << 16);
+ rtn = SUCCESS;
+ }
/* if rtn == FAILED, we have no sense information;
* returning FAILED will wake the error handler thread
* to collect the sense and redo the decide
@@ -1529,6 +1536,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
case RESERVATION_CONFLICT:
SCSI_LOG_ERROR_RECOVERY(3, sdev_printk(KERN_INFO, scmd->device,
"reservation conflict\n"));
+ scmd->result |= (DID_TARGET_FAILURE << 16);
return SUCCESS; /* causes immediate i/o error */
default:
return FAILED;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9ade720..fb841e3 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -736,8 +736,20 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
memcpy(req->sense, cmd->sense_buffer, len);
req->sense_len = len;
}
- if (!sense_deferred)
- error = -EIO;
+ if (!sense_deferred) {
+ switch(host_byte(result)) {
+ case DID_TRANSPORT_FAILFAST:
+ error = -ENOLINK;
+ break;
+ case DID_TARGET_FAILURE:
+ cmd->result |= (DID_OK << 16);
+ error = -EREMOTEIO;
+ break;
+ default:
+ error = -EIO;
+ break;
+ }
+ }
}

req->resid_len = scsi_get_resid(cmd);
@@ -796,7 +808,18 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL)
return;

- error = -EIO;
+ switch (host_byte(result)) {
+ case DID_TRANSPORT_FAILFAST:
+ error = -ENOLINK;
+ break;
+ case DID_TARGET_FAILURE:
+ cmd->result |= (DID_OK << 16);
+ error = -EREMOTEIO;
+ break;
+ default:
+ error = -EIO;
+ break;
+ }

if (host_byte(result) == DID_RESET) {
/* Third party bus reset or reset for error recovery
@@ -1418,7 +1441,6 @@ static void scsi_softirq_done(struct request *rq)
wait_for/HZ);
disposition = SUCCESS;
}
-
scsi_log_completion(cmd, disposition);

switch (disposition) {
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 8fcb6e0..abfee76 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -397,6 +397,8 @@ static inline int scsi_is_wlun(unsigned int lun)
* recover the link. Transport class will
* retry or fail IO */
#define DID_TRANSPORT_FAILFAST 0x0f /* Transport class fastfailed the io */
+#define DID_TARGET_FAILURE 0x10 /* Permanent target failure, do not retry on
+ * other paths */
#define DRIVER_OK 0x00 /* Driver status */

/*
@@ -426,6 +428,7 @@ static inline int scsi_is_wlun(unsigned int lun)
#define TIMEOUT_ERROR 0x2007
#define SCSI_RETURN_NOT_HANDLED 0x2008
#define FAST_IO_FAIL 0x2009
+#define TARGET_ERROR 0x200A

/*
* Midlevel queue return values.

--------------080001040105040309060106--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/