Re: 2.6.31-rc5 regression: hd don't show up

From: Tejun Heo
Date: Mon Aug 31 2009 - 03:39:27 EST


Tim Blechmann wrote:
> booting the machine today, one hd is missing again ... bootlog attached

Hmmm... strange. I don't really see how it could be escaping. Can
you please apply the attached patch? It still won't change the
behavior but should be able to catch where it's escaping.

Thanks.

--
tejun
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 98af50f..6edd2c4 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3656,12 +3656,19 @@ int ata_wait_ready(struct ata_link *link, unsigned long deadline,
!ata_link_offline(link) &&
time_before(now, nodev_deadline))
ready = 0;
+ else
+ ata_link_printk(link, KERN_INFO, "XXX wait_ready ready=%d link_offline=%d\n",
+ ready, ata_link_offline(link));
}

- if (ready)
+ if (ready) {
+ ata_link_printk(link, KERN_INFO, "XXX wait_read returning %d\n", ready);
return ready;
- if (time_after(now, deadline))
+ }
+ if (time_after(now, deadline)) {
+ ata_link_printk(link, KERN_INFO, "XXX wait_read returning -EBUSY\n");
return -EBUSY;
+ }

if (!warned && time_after(now, start + 5 * HZ) &&
(deadline - now > 3 * HZ)) {
@@ -3792,6 +3799,7 @@ int sata_link_resume(struct ata_link *link, const unsigned long *params,

scontrol = (scontrol & 0x0f0) | 0x300;

+ ata_link_printk(link, KERN_INFO, "XXX bringing up link\n");
if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
return rc;

@@ -3800,7 +3808,9 @@ int sata_link_resume(struct ata_link *link, const unsigned long *params,
*/
msleep(200);

- if ((rc = sata_link_debounce(link, params, deadline)))
+ rc = sata_link_debounce(link, params, deadline);
+ ata_link_printk(link, KERN_INFO, "XXX debounced rc=%d\n", rc);
+ if (rc)
return rc;

/* clear SError, some PHYs require this even for SRST to work */
@@ -3926,12 +3936,16 @@ int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
if (rc)
goto out;
/* if link is offline nothing more to do */
- if (ata_phys_link_offline(link))
+ if (ata_phys_link_offline(link)) {
+ ata_link_printk(link, KERN_INFO, "XXX phys link offline\n");
goto out;
+ }

/* Link is online. From this point, -ENODEV too is an error. */
- if (online)
+ if (online) {
+ ata_link_printk(link, KERN_INFO, "XXX setting online\n");
*online = true;
+ }

if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) {
/* If PMP is supported, we have to do follow-up SRST.
@@ -3964,6 +3978,8 @@ int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
"COMRESET failed (errno=%d)\n", rc);
}
DPRINTK("EXIT, rc=%d\n", rc);
+ ata_link_printk(link, KERN_INFO, "XXX link_hardreset online=%d returning %d\n",
+ online ? *online : -1, rc);
return rc;
}

@@ -6082,7 +6098,7 @@ static void async_port_probe(void *data, async_cookie_t cookie)

ehi->probe_mask |= ATA_ALL_DEVICES;
ehi->action |= ATA_EH_RESET | ATA_EH_LPM;
- ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET;
+ ehi->flags |= ATA_EHI_NO_AUTOPSY/* | ATA_EHI_QUIET*/;

ap->pflags &= ~ATA_PFLAG_INITIALIZING;
ap->pflags |= ATA_PFLAG_LOADING;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index bbbb1fa..468b5b9 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1998,6 +1998,9 @@ unsigned int ata_sff_dev_classify(struct ata_device *dev, int present,
if (r_err)
*r_err = err;

+ ata_dev_printk(dev, KERN_INFO, "XXX CLASSIFY TF %02x/%02x:%02x:%02x:%02x\n",
+ tf.command, tf.feature, tf.lbal, tf.lbam, tf.lbah);
+
/* see if device passed diags: continue and warn later */
if (err == 0)
/* diagnostic fail : do nothing _YET_ */
@@ -2006,11 +2009,14 @@ unsigned int ata_sff_dev_classify(struct ata_device *dev, int present,
/* do nothing */ ;
else if ((dev->devno == 0) && (err == 0x81))
/* do nothing */ ;
- else
+ else {
+ ata_dev_printk(dev, KERN_INFO, "XXX diag nodev\n");
return ATA_DEV_NONE;
+ }

/* determine if device is ATA or ATAPI */
class = ata_dev_classify(&tf);
+ ata_dev_printk(dev, KERN_INFO, "XXX ata_dev_classify=%d\n", class);

if (class == ATA_DEV_UNKNOWN) {
/* If the device failed diagnostic, it's likely to
@@ -2019,13 +2025,18 @@ unsigned int ata_sff_dev_classify(struct ata_device *dev, int present,
* device signature is invalid with diagnostic
* failure.
*/
- if (present && (dev->horkage & ATA_HORKAGE_DIAGNOSTIC))
+ if (present && (dev->horkage & ATA_HORKAGE_DIAGNOSTIC)) {
+ ata_dev_printk(dev, KERN_INFO, "XXX UNK && present -> ATA\n");
class = ATA_DEV_ATA;
- else
+ } else {
class = ATA_DEV_NONE;
+ ata_dev_printk(dev, KERN_INFO, "XXX UNK && !present -> NONE\n");
+ }
} else if ((class == ATA_DEV_ATA) &&
- (ap->ops->sff_check_status(ap) == 0))
+ (ap->ops->sff_check_status(ap) == 0)) {
class = ATA_DEV_NONE;
+ ata_dev_printk(dev, KERN_INFO, "XXX stat==0 -> NONE\n");
+ }

return class;
}
@@ -2211,6 +2222,8 @@ int sata_sff_hardreset(struct ata_link *link, unsigned int *class,
*class = ata_sff_dev_classify(link->device, 1, NULL);

DPRINTK("EXIT, class=%u\n", *class);
+ ata_link_printk(link, KERN_INFO, "XXX sff_hardreset rc=%d online=%d *class=%d\n",
+ rc, online, *class);
return rc;
}
EXPORT_SYMBOL_GPL(sata_sff_hardreset);