This patch is a first try at implementing proper IDE power management.
It's not intended to be merged as-is, there is some uglyness here or
there, it's here for comments and discussion.
The point is to pipe the power management requests through the request
queue for proper locking. Since those requests involve several
operations that have to be tied together with the queue beeing locked
for further 'user' requests, they are implemented as a state machine
with specific callbacks in the subdrivers.
The guts of the patch is the core changes. I did the actual
implementation for ide-disk & ide-cd as a way to quickly validate it (it
works), but I'm sure we want to do more than just what I implemented
here. At least it's ok for PPC, but I beleive some x86 will want you to
do more (Alan suggested reverting to PIO for example, to please some
BIOSes, though that would suck badly for suspend-to-disk, there may be
more involved regarding recovering from errors in flush too).
One thing that should probably be cleaned up is the difference between
the suspend and the resume request. I didn't want to implement 2
different request bits to avoid using too much of that bit-space, and
because most of the core handling is the same. So right now, I carry in
the special structure attached to the request, 2 fields. An int
indicating if we are doing a suspend or a resume op, and an int that is
the actual state machine step.
However, for convenience, my ide-cd and ide-disk implementation
implement resume just as different step number in the same routine...
So we could either get rid of the "int suspend" field completely and
just define 2 different ranges for "step". Or we could keep "suspend",
but then it may make sense to split the sub-driver ops in 2
(suspend/suspend_completion & resume/resume_completion).
Waiting for feedback...
Ben.
===== drivers/ide/ide-cd.c 1.43 vs edited =====
--- 1.43/drivers/ide/ide-cd.c Sat Apr 19 23:18:27 2003
+++ edited/drivers/ide/ide-cd.c Thu Apr 24 14:19:32 2003
@@ -3242,6 +3242,46 @@
static int ide_cdrom_attach (ide_drive_t *drive);
+static void ide_cdrom_complete_power_step (ide_drive_t *drive, ide_power_state_t *state, u8 stat, u8 error)
+{
+}
+
+/* Power Management state machine.
+ *
+ * We don't do much for CDs right now
+ */
+static ide_startstop_t ide_cdrom_start_power_step (ide_drive_t *drive, ide_power_state_t *state)
+{
+ ide_task_t args;
+
+ memset(&args, 0, sizeof(ide_task_t));
+
+ if (state->step == 0) {
+ state->step = state->suspend ? 1 : 101;
+ return ide_stopped;
+ }
+ switch(state->step) {
+ case 1:
+ break;
+
+ case 101: /* Resume step 1 (restore DMA) */
+ /* Right now, all we do is call ide_dma_check for the HWIF,
+ * we could be smarter and check for current xfer_speed
+ * in struct drive etc...
+ * Also, this step could be implemented as a generic helper
+ * as most subdrivers will use it
+ */
+ if (!drive->id || !(drive->id->capability & 1))
+ break;
+ if (HWIF(drive)->ide_dma_check == NULL)
+ break;
+ HWIF(drive)->ide_dma_check(drive);
+ break;
+ }
+ state->step = ide_power_state_completed;
+ return ide_stopped;
+}
+
static ide_driver_t ide_cdrom_driver = {
.owner = THIS_MODULE,
.name = "ide-cdrom",
@@ -3258,6 +3298,12 @@
.capacity = ide_cdrom_capacity,
.attach = ide_cdrom_attach,
.drives = LIST_HEAD_INIT(ide_cdrom_driver.drives),
+ .start_power_step = ide_cdrom_start_power_step,
+ .complete_power_step = ide_cdrom_complete_power_step,
+ .gen_driver = {
+ .suspend = generic_ide_suspend,
+ .resume = generic_ide_resume,
+ }
};
static int idecd_open(struct inode * inode, struct file * file)
===== drivers/ide/ide-disk.c 1.38 vs edited =====
--- 1.38/drivers/ide/ide-disk.c Mon Apr 21 12:32:44 2003
+++ edited/drivers/ide/ide-disk.c Thu Apr 24 14:37:22 2003
@@ -72,8 +72,6 @@
#include "legacy/pdc4030.h"
-static int driver_blocked;
-
static inline u32 idedisk_read_24 (ide_drive_t *drive)
{
u8 hcyl = HWIF(drive)->INB(IDE_HCYL_REG);
@@ -372,9 +370,6 @@
nsectors.all = (u16) rq->nr_sectors;
- if (driver_blocked)
- panic("Request while ide driver is blocked?");
-
#if defined(CONFIG_BLK_DEV_PDC4030) || defined(CONFIG_BLK_DEV_PDC4030_MODULE)
if (IS_PDC4030_DRIVE)
return promise_rw_disk(drive, rq, block);
@@ -1398,21 +1393,6 @@
return 0;
}
-static int call_idedisk_standby (ide_drive_t *drive, int arg)
-{
- ide_task_t args;
- u8 standby = (arg) ? WIN_STANDBYNOW2 : WIN_STANDBYNOW1;
- memset(&args, 0, sizeof(ide_task_t));
- args.tfRegister[IDE_COMMAND_OFFSET] = standby;
- args.command_type = ide_cmd_type_parser(&args);
- return ide_raw_taskfile(drive, &args, NULL);
-}
-
-static int do_idedisk_standby (ide_drive_t *drive)
-{
- return call_idedisk_standby(drive, 0);
-}
-
static int do_idedisk_flushcache (ide_drive_t *drive)
{
ide_task_t args;
@@ -1511,37 +1491,66 @@
#endif
}
-static int idedisk_suspend(struct device *dev, u32 state, u32 level)
-{
- ide_drive_t *drive = dev->driver_data;
-
- printk("Suspending device %p\n", dev->driver_data);
-
- /* I hope that every freeze operation from the upper levels have
- * already been done...
- */
-
- if (level != SUSPEND_SAVE_STATE)
- return 0;
-
- /* set the drive to standby */
- printk(KERN_INFO "suspending: %s ", drive->name);
- do_idedisk_standby(drive);
- drive->blocked = 1;
- BUG_ON (HWGROUP(drive)->handler);
- return 0;
+static void idedisk_complete_power_step (ide_drive_t *drive, ide_power_state_t *state, u8 stat, u8 error)
+{
+ switch(state->step) {
+ case 1: /* Suspend step 1 (flush cache) complete */
+ state->step = 2;
+ break;
+ case 2: /* Suspend step 2 (standby) complete */
+ state->step = ide_power_state_completed;
+ break;
+ }
}
-static int idedisk_resume(struct device *dev, u32 level)
+/* Power Management state machine. This one is rather trivial for now,
+ * we should probably add more, like switching back to PIO on suspend
+ * to help some BIOSes, re-do the door locking on resume, etc...
+ */
+static ide_startstop_t idedisk_start_power_step (ide_drive_t *drive, ide_power_state_t *state)
{
- ide_drive_t *drive = dev->driver_data;
+ ide_task_t args;
- if (level != RESUME_RESTORE_STATE)
- return 0;
- BUG_ON(!drive->blocked);
- drive->blocked = 0;
- return 0;
+ memset(&args, 0, sizeof(ide_task_t));
+
+ if (state->step == 0) {
+ state->step = state->suspend ? 1 : 101;
+ return ide_stopped;
+ }
+ switch(state->step) {
+ case 1: /* Suspend step 1 (flush cache) */
+ if (!drive->wcache) {
+ idedisk_complete_power_step(drive, state, 0, 0);
+ return ide_stopped;
+ }
+ if (drive->id->cfs_enable_2 & 0x2400)
+ args.tfRegister[IDE_COMMAND_OFFSET] = WIN_FLUSH_CACHE_EXT;
+ else
+ args.tfRegister[IDE_COMMAND_OFFSET] = WIN_FLUSH_CACHE;
+ args.command_type = ide_cmd_type_parser(&args);
+ return do_rw_taskfile(drive, &args);
+ case 2: /* Suspend step 2 (standby) */
+ args.tfRegister[IDE_COMMAND_OFFSET] = WIN_STANDBYNOW1;
+ args.command_type = ide_cmd_type_parser(&args);
+ return do_rw_taskfile(drive, &args);
+
+ case 101: /* Resume step 1 (restore DMA) */
+ /* Right now, all we do is call ide_dma_check for the HWIF,
+ * we could be smarter and check for current xfer_speed
+ * in struct drive etc...
+ * Also, this step could be implemented as a generic helper
+ * as most subdrivers will use it
+ */
+ if (!drive->id || !(drive->id->capability & 1))
+ break;
+ if (HWIF(drive)->ide_dma_check == NULL)
+ break;
+ HWIF(drive)->ide_dma_check(drive);
+ break;
+ }
+ state->step = ide_power_state_completed;
+ return ide_stopped;
}
static void idedisk_setup (ide_drive_t *drive)
@@ -1680,9 +1689,11 @@
.proc = idedisk_proc,
.attach = idedisk_attach,
.drives = LIST_HEAD_INIT(idedisk_driver.drives),
+ .start_power_step = idedisk_start_power_step,
+ .complete_power_step = idedisk_complete_power_step,
.gen_driver = {
- .suspend = idedisk_suspend,
- .resume = idedisk_resume,
+ .suspend = generic_ide_suspend,
+ .resume = generic_ide_resume,
}
};
===== drivers/ide/ide-io.c 1.8 vs edited =====
--- 1.8/drivers/ide/ide-io.c Sun Apr 20 23:14:28 2003
+++ edited/drivers/ide/ide-io.c Thu Apr 24 14:36:57 2003
@@ -139,6 +139,39 @@
EXPORT_SYMBOL(ide_end_request);
/**
+ * ide_complete_pm_request - end the current Power Management request
+ * @drive: target drive
+ * @rq: request
+ *
+ * This function cleans up the current PM request and stops the queue
+ * if necessary.
+ */
+
+static void ide_complete_pm_request (ide_drive_t *drive, struct request *rq)
+{
+ ide_power_state_t *state = (ide_power_state_t *)rq->special;
+ unsigned long flags;
+ int suspend = state->suspend;
+
+#ifdef DEBUG_PM
+ printk("%s: completing PM request, suspend: %d\n", drive->name, state->suspend);
+#endif
+ spin_lock_irqsave(&ide_lock, flags);
+ if (suspend)
+ __blk_stop_queue(&drive->queue);
+ else
+ drive->blocked = 0;
+ blkdev_dequeue_request(rq);
+ HWGROUP(drive)->rq = NULL;
+ end_that_request_last(rq);
+ spin_unlock_irqrestore(&ide_lock, flags);
+ /* Hrm... there is no __blk_start_queue... */
+ if (!suspend)
+ blk_start_queue(&drive->queue);
+}
+
+
+/**
* ide_end_drive_cmd - end an explicit drive command
* @drive: command
* @stat: status bits
@@ -214,6 +247,17 @@
args->hobRegister[IDE_HCYL_OFFSET_HOB] = hwif->INB(IDE_HCYL_REG);
}
}
+ } else if (rq->flags & REQ_POWER_MANAGEMENT) {
+ ide_power_state_t *state = (ide_power_state_t *)rq->special;
+
+#ifdef DEBUG_PM
+ printk("%s: complete_power_step(susp: %d, step: %d, stat: %x, err: %x)\n",
+ drive->name, state->suspend, state->step, stat, err);
+#endif
+ DRIVER(drive)->complete_power_step(drive, state, stat, err);
+ if (state->step == ide_power_state_completed)
+ ide_complete_pm_request(drive, rq);
+ return;
}
spin_lock_irqsave(&ide_lock, flags);
@@ -615,7 +659,37 @@
while ((read_timer() - HWIF(drive)->last_time) < DISK_RECOVERY_TIME);
#endif
- SELECT_DRIVE(drive);
+ if (rq->flags & REQ_POWER_MANAGEMENT) {
+ ide_power_state_t *state = (ide_power_state_t *)rq->special;
+ int rc;
+
+ /* Mark drive blocked when starting the suspend sequence */
+ if (state->suspend && state->step == 0)
+ drive->blocked = 1;
+ /* If this is a power management wakeup request, the first thing
+ * we do is to wait for BSY bit to go away (with a looong timeout)
+ * as a drive on this hwif may just be POSTing itself. We do that
+ * before even selecting as the "other" device on the bus may be
+ * broken enough to walk on our toes at this point.
+ */
+ if (state->suspend != 0 || state->step != 0)
+ goto dont_wait;
+#ifdef DEBUG_PM
+ printk("%s: Wakeup request inited, waiting for !BSY...\n", drive->name);
+#endif
+ rc = ide_wait_not_busy(HWIF(drive), 35000);
+ if (rc)
+ printk(KERN_WARNING "%s: bus not ready on wakeup\n", drive->name);
+ SELECT_DRIVE(drive);
+ HWIF(drive)->OUTB(8, HWIF(drive)->io_ports[IDE_CONTROL_OFFSET]);
+ rc = ide_wait_not_busy(HWIF(drive), 10000);
+ if (rc)
+ printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name);
+ } else {
+dont_wait:
+ SELECT_DRIVE(drive);
+ }
+
if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) {
printk(KERN_ERR "%s: drive not ready for command\n", drive->name);
return startstop;
@@ -625,6 +699,19 @@
return execute_drive_cmd(drive, rq);
else if (rq->flags & REQ_DRIVE_TASKFILE)
return execute_drive_cmd(drive, rq);
+ else if (rq->flags & REQ_POWER_MANAGEMENT) {
+ ide_power_state_t *state = (ide_power_state_t *)rq->special;
+
+#ifdef DEBUG_PM
+ printk("%s: start_power_step(susp: %d, step: %d)\n", drive->name, state->suspend, state->step);
+#endif
+ startstop = DRIVER(drive)->start_power_step(drive, state);
+ if (startstop == ide_stopped && state->step == ide_power_state_completed) {
+ ide_complete_pm_request(drive, rq);
+ return ide_stopped;
+ } else
+ return startstop;
+ }
return (DRIVER(drive)->do_request(drive, rq, block));
}
return do_special(drive);
@@ -835,6 +922,22 @@
if (!rq) {
hwgroup->busy = !!ata_pending_commands(drive);
break;
+ }
+
+ /* Sanity: don't accept a request that isn't a PM request
+ * if we are currently power managed. This is very important as
+ * blk_stop_queue() doesn't prevent the elv_next_request() above
+ * to return us whatever is in the queue. Since we call ide_do_request()
+ * ourselves, we end up taking requests while the queue is blocked...
+ */
+ if (drive->blocked && !(rq->flags & REQ_POWER_MANAGEMENT)) {
+#ifdef DEBUG_PM
+ printk("%s: a request made it's way while we are power managing...\n", drive->name);
+#endif
+ /* We clear busy, there should be no pending ATA command at this point
+ */
+ hwgroup->busy = 0;
+ break;
}
if (!rq->bio && ata_pending_commands(drive))
===== drivers/ide/ide-iops.c 1.15 vs edited =====
--- 1.15/drivers/ide/ide-iops.c Wed Mar 26 21:05:24 2003
+++ edited/drivers/ide/ide-iops.c Thu Apr 24 14:32:28 2003
@@ -1318,5 +1318,32 @@
return do_reset1(drive, 0);
}
+/*
+ * This function waits for the hwif to report a non-busy status
+ * see comments in probe_hwif()
+ */
+int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout)
+{
+ u8 stat = 0;
+
+ while(timeout--) {
+ /* Turn this into a schedule() sleep once I'm sure
+ * about locking issues (2.5 work ?)
+ */
+ mdelay(1);
+ stat = hwif->INB(hwif->io_ports[IDE_STATUS_OFFSET]);
+ if ((stat & BUSY_STAT) == 0)
+ break;
+ /* Assume a value of 0xff means nothing is connected to
+ * the interface and it doesn't implement the pull-down
+ * resistor on D7
+ */
+ if (stat == 0xff)
+ break;
+ }
+ return ((stat & BUSY_STAT) == 0) ? 0 : -EBUSY;
+}
+
+
EXPORT_SYMBOL(ide_do_reset);
===== drivers/ide/ide-probe.c 1.40 vs edited =====
--- 1.40/drivers/ide/ide-probe.c Fri Apr 18 17:58:55 2003
+++ edited/drivers/ide/ide-probe.c Thu Apr 24 14:32:04 2003
@@ -723,35 +723,6 @@
//EXPORT_SYMBOL(hwif_register);
-/* Enable code below on all archs later, for now, I want it on PPC
- */
-#ifdef CONFIG_PPC
-/*
- * This function waits for the hwif to report a non-busy status
- * see comments in probe_hwif()
- */
-static int wait_not_busy(ide_hwif_t *hwif, unsigned long timeout)
-{
- u8 stat = 0;
-
- while(timeout--) {
- /* Turn this into a schedule() sleep once I'm sure
- * about locking issues (2.5 work ?)
- */
- mdelay(1);
- stat = hwif->INB(hwif->io_ports[IDE_STATUS_OFFSET]);
- if ((stat & BUSY_STAT) == 0)
- break;
- /* Assume a value of 0xff means nothing is connected to
- * the interface and it doesn't implement the pull-down
- * resistor on D7
- */
- if (stat == 0xff)
- break;
- }
- return ((stat & BUSY_STAT) == 0) ? 0 : -EBUSY;
-}
-
static int wait_hwif_ready(ide_hwif_t *hwif)
{
int rc;
@@ -766,7 +737,7 @@
* I know of at least one disk who takes 31 seconds, I use 35
* here to be safe
*/
- rc = wait_not_busy(hwif, 35000);
+ rc = ide_wait_not_busy(hwif, 35000);
if (rc)
return rc;
@@ -774,20 +745,19 @@
SELECT_DRIVE(&hwif->drives[0]);
hwif->OUTB(8, hwif->io_ports[IDE_CONTROL_OFFSET]);
mdelay(2);
- rc = wait_not_busy(hwif, 10000);
+ rc = ide_wait_not_busy(hwif, 10000);
if (rc)
return rc;
SELECT_DRIVE(&hwif->drives[1]);
hwif->OUTB(8, hwif->io_ports[IDE_CONTROL_OFFSET]);
mdelay(2);
- rc = wait_not_busy(hwif, 10000);
+ rc = ide_wait_not_busy(hwif, 10000);
/* Exit function with master reselected (let's be sane) */
SELECT_DRIVE(&hwif->drives[0]);
return rc;
}
-#endif /* CONFIG_PPC */
/*
* This routine only knows how to look for drive units 0 and 1
===== drivers/ide/ide.c 1.58 vs edited =====
--- 1.58/drivers/ide/ide.c Fri Apr 18 17:57:08 2003
+++ edited/drivers/ide/ide.c Thu Apr 24 14:31:26 2003
@@ -1448,6 +1448,52 @@
EXPORT_SYMBOL(ata_attach);
+int generic_ide_suspend(struct device *dev, u32 state, u32 level)
+{
+ ide_drive_t *drive = dev->driver_data;
+ struct request rq;
+ ide_power_state_t args;
+
+ if (level == dev->power_state || level != SUSPEND_SAVE_STATE)
+ return 0;
+
+ /* The suspend request is a state machine of taskfile requests */
+ memset(&rq, 0, sizeof(rq));
+ memset(&args, 0, sizeof(args));
+ rq.flags = REQ_POWER_MANAGEMENT;
+ rq.special = &args;
+ args.step = 0;
+ args.suspend = 1;
+
+ /* Start it */
+ return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+EXPORT_SYMBOL(generic_ide_suspend);
+
+int generic_ide_resume(struct device *dev, u32 level)
+{
+ ide_drive_t *drive = dev->driver_data;
+ struct request rq;
+ ide_power_state_t args;
+
+ if (level == dev->power_state || level != RESUME_RESTORE_STATE)
+ return 0;
+
+ /* The suspend request is a state machine of taskfile requests */
+ memset(&rq, 0, sizeof(rq));
+ memset(&args, 0, sizeof(args));
+ rq.flags = REQ_POWER_MANAGEMENT;
+ rq.special = &args;
+ args.step = 0;
+ args.suspend = 0;
+
+ /* Start it */
+ return ide_do_drive_cmd(drive, &rq, ide_preempt);
+}
+
+EXPORT_SYMBOL(generic_ide_resume);
+
int generic_ide_ioctl(struct block_device *bdev, unsigned int cmd,
unsigned long arg)
{
===== include/linux/blkdev.h 1.100 vs edited =====
--- 1.100/include/linux/blkdev.h Sun Apr 20 18:20:10 2003
+++ edited/include/linux/blkdev.h Thu Apr 24 14:30:50 2003
@@ -116,6 +116,7 @@
__REQ_DRIVE_CMD,
__REQ_DRIVE_TASK,
__REQ_DRIVE_TASKFILE,
+ __REQ_POWER_MANAGEMENT,
__REQ_NR_BITS, /* stops here */
};
@@ -137,6 +138,7 @@
#define REQ_DRIVE_CMD (1 << __REQ_DRIVE_CMD)
#define REQ_DRIVE_TASK (1 << __REQ_DRIVE_TASK)
#define REQ_DRIVE_TASKFILE (1 << __REQ_DRIVE_TASKFILE)
+#define REQ_POWER_MANAGEMENT (1 << __REQ_POWER_MANAGEMENT)
#include <linux/elevator.h>
===== include/linux/ide.h 1.46 vs edited =====
--- 1.46/include/linux/ide.h Mon Apr 21 01:21:19 2003
+++ edited/include/linux/ide.h Thu Apr 24 14:31:07 2003
@@ -1160,6 +1160,39 @@
#endif
/*
+ * Power Management related definitions
+ */
+typedef struct ide_power_state_s
+{
+ int suspend; /* 1 = suspending, 0 = resuming */
+ int step; /* Step in PM state machine */
+ void* data; /* Any additional data the subdriver wants */
+} ide_power_state_t;
+
+#define ide_power_state_completed -1
+#define ide_power_state_first 0
+//#define ide_power_state_core_first 100
+//#define ide_power_state_core_last 199
+
+/* The step value starts at 0 (ide_power_state_first).
+ *
+ * For each step, the core calls the subdriver start_power_step()
+ * first. This can return
+ * - ide_stopped : In this case, the core calls us back again unless
+ * step have been set to ide_power_state_completed
+ * - ide_started : In this case, the channel is left busy until an
+ * async event (interrupt) occurs.
+ * Typically, start_power_step() will issue a taskfile request with
+ * do_rw_taskfile().
+ *
+ * Upon reception of the interrupt, the core will call complete_power_step()
+ * with the error code if any. This routine should update the step value
+ * and return. It should not start a new request. The core will call
+ * start_power_step for the new step value, unless step have been set to
+ * ide_power_state_completed.
+ */
+
+/*
* Subdrivers support.
*/
#define IDE_SUBDRIVER_VERSION 1
@@ -1188,6 +1221,8 @@
int (*attach)(ide_drive_t *);
void (*ata_prebuilder)(ide_drive_t *);
void (*atapi_prebuilder)(ide_drive_t *);
+ ide_startstop_t (*start_power_step)(ide_drive_t *, ide_power_state_t *state);
+ void (*complete_power_step)(ide_drive_t *, ide_power_state_t *state, u8 stat, u8 error);
struct device_driver gen_driver;
struct list_head drives;
struct list_head drivers;
@@ -1196,6 +1231,8 @@
#define DRIVER(drive) ((drive)->driver)
extern int generic_ide_ioctl(struct block_device *, unsigned, unsigned long);
+extern int generic_ide_suspend(struct device *dev, u32 state, u32 level);
+extern int generic_ide_resume(struct device *dev, u32 level);
/*
* IDE modules.
@@ -1550,6 +1587,8 @@
extern u8 eighty_ninty_three (ide_drive_t *);
extern int set_transfer(ide_drive_t *, ide_task_t *);
extern int taskfile_lib_get_identify(ide_drive_t *drive, u8 *);
+
+extern int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout);
/*
* ide_system_bus_speed() returns what we think is the system VESA/PCI
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Wed Apr 30 2003 - 22:00:13 EST