Re: [PATCH scsi-misc-2.6 02/07] scsi: make scsi_send_eh_cmnd use its own timer instead of scmd->eh_timeout

From: Tejun Heo
Date: Sun Apr 10 2005 - 13:59:39 EST


02_scsi_timer_eh_timer_fix.patch

scmd->eh_timeout is used to resolve the race between command
completion and timeout. However, during error handling,
scsi_send_eh_cmnd uses scmd->eh_timeout. This creates a race
condition between eh and normal completion for a request which
has timed out and in the process of error handling. If the
request completes while scmd->eh_timeout is being used by eh,
eh timeout is lost and the command will be handled by both eh
and completion path. This patch fixes the race by making
scsi_send_eh_cmnd() use its own timer.

Signed-off-by: Tejun Heo <htejun@xxxxxxxxx>

scsi_error.c | 64 ++++++++++++++++++-----------------------------------------
scsi_priv.h | 1
2 files changed, 20 insertions(+), 45 deletions(-)

Index: scsi-reqfn-export/drivers/scsi/scsi_error.c
===================================================================
--- scsi-reqfn-export.orig/drivers/scsi/scsi_error.c 2005-04-11 03:42:11.000000000 +0900
+++ scsi-reqfn-export/drivers/scsi/scsi_error.c 2005-04-11 03:42:11.000000000 +0900
@@ -420,46 +420,12 @@ static int scsi_eh_completed_normally(st
}

/**
- * scsi_eh_times_out - timeout function for error handling.
- * @scmd: Cmd that is timing out.
- *
- * Notes:
- * During error handling, the kernel thread will be sleeping waiting
- * for some action to complete on the device. our only job is to
- * record that it timed out, and to wake up the thread.
- **/
-static void scsi_eh_times_out(struct scsi_cmnd *scmd)
-{
- scsi_eh_eflags_set(scmd, SCSI_EH_REC_TIMEOUT);
- SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__,
- scmd));
-
- if (scmd->device->host->eh_action)
- up(scmd->device->host->eh_action);
-}
-
-/**
* scsi_eh_done - Completion function for error handling.
* @scmd: Cmd that is done.
**/
static void scsi_eh_done(struct scsi_cmnd *scmd)
{
- /*
- * if the timeout handler is already running, then just set the
- * flag which says we finished late, and return. we have no
- * way of stopping the timeout handler from running, so we must
- * always defer to it.
- */
- if (del_timer(&scmd->eh_timeout)) {
- scmd->request->rq_status = RQ_SCSI_DONE;
- scmd->owner = SCSI_OWNER_ERROR_HANDLER;
-
- SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n",
- __FUNCTION__, scmd, scmd->result));
-
- if (scmd->device->host->eh_action)
- up(scmd->device->host->eh_action);
- }
+ up(scmd->device->host->eh_action);
}

/**
@@ -478,6 +444,7 @@ static int scsi_send_eh_cmnd(struct scsi
{
struct scsi_device *sdev = scmd->device;
struct Scsi_Host *shost = sdev->host;
+ struct timer_list timer;
DECLARE_MUTEX_LOCKED(sem);
unsigned long flags;
int rtn = SUCCESS;
@@ -492,7 +459,11 @@ static int scsi_send_eh_cmnd(struct scsi
scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) |
(sdev->lun << 5 & 0xe0);

- scsi_add_timer(scmd, timeout, scsi_eh_times_out);
+ init_timer(&timer);
+ timer.expires = jiffies + timeout;
+ timer.function = (void (*)(unsigned long))scsi_eh_done;
+ timer.data = (unsigned long)scmd;
+ add_timer(&timer);

/*
* set up the semaphore so we wait for the command to complete.
@@ -508,14 +479,19 @@ static int scsi_send_eh_cmnd(struct scsi
down(&sem);
scsi_log_completion(scmd, SUCCESS);

- shost->eh_action = NULL;
-
- /*
- * see if timeout. if so, tell the host to forget about it.
- * in other words, we don't want a callback any more.
- */
- if (scsi_eh_eflags_chk(scmd, SCSI_EH_REC_TIMEOUT)) {
- scsi_eh_eflags_clr(scmd, SCSI_EH_REC_TIMEOUT);
+ if (del_timer(&timer)) {
+ SCSI_LOG_ERROR_RECOVERY(3,
+ printk("scsi_eh_done scmd: %p result: %x\n",
+ scmd, scmd->result));
+ scmd->request->rq_status = RQ_SCSI_DONE;
+ scmd->owner = SCSI_OWNER_ERROR_HANDLER;
+ } else {
+ /*
+ * Timed out. Tell the host to forget about it. In
+ * other words, we don't want a callback any more.
+ */
+ SCSI_LOG_ERROR_RECOVERY(3,
+ printk("scsi_eh_times_out scmd: %p\n", scmd));
scmd->owner = SCSI_OWNER_LOWLEVEL;

/*
Index: scsi-reqfn-export/drivers/scsi/scsi_priv.h
===================================================================
--- scsi-reqfn-export.orig/drivers/scsi/scsi_priv.h 2005-04-11 03:42:10.000000000 +0900
+++ scsi-reqfn-export/drivers/scsi/scsi_priv.h 2005-04-11 03:42:11.000000000 +0900
@@ -42,7 +42,6 @@ struct Scsi_Host;
(scp->eh_eflags = 0)

#define SCSI_EH_CANCEL_CMD 0x0001 /* Cancel this cmd */
-#define SCSI_EH_REC_TIMEOUT 0x0002 /* EH retry timed out */

#define SCSI_SENSE_VALID(scmd) \
(((scmd)->sense_buffer[0] & 0x70) == 0x70)

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/