diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-2/drivers/block/DAC960.c linux/drivers/block/DAC960.c --- /opt/tmp/linux-2.3.99-pre6-2/drivers/block/DAC960.c Mon Mar 13 04:32:57 2000 +++ linux/drivers/block/DAC960.c Thu Apr 13 22:25:37 2000 @@ -1334,8 +1334,9 @@ IO_Request_T *Request; DAC960_Command_T *Command; char *RequestBuffer; + request_queue_t *q = &blk_dev[DAC960_MAJOR + Controller->ControllerNumber].request_queue; - queue_head = &blk_dev[DAC960_MAJOR + Controller->ControllerNumber].request_queue.queue_head; + queue_head = &q->queue_head; while (true) { if (list_empty(queue_head)) return false; @@ -1344,9 +1345,9 @@ Command = DAC960_AllocateCommand(Controller); if (Command != NULL) break; if (!WaitForCommand) return false; - spin_unlock(&io_request_lock); + spin_unlock(&q->request_lock); sleep_on(&Controller->CommandWaitQueue); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->request_lock); } DAC960_ClearCommand(Command); if (Request->cmd == READ) @@ -1361,9 +1362,8 @@ Command->SegmentCount = Request->nr_segments; Command->BufferHeader = Request->bh; RequestBuffer = Request->buffer; - Request->rq_status = RQ_INACTIVE; blkdev_dequeue_request(Request); - wake_up(&wait_for_request); + blkdev_release_request(Request); if (Command->SegmentCount == 1) { DAC960_CommandMailbox_T *CommandMailbox = &Command->CommandMailbox; diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-2/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- /opt/tmp/linux-2.3.99-pre6-2/drivers/block/ll_rw_blk.c Thu Apr 13 21:56:00 2000 +++ linux/drivers/block/ll_rw_blk.c Thu Apr 13 22:54:48 2000 @@ -4,6 +4,7 @@ * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1994, Karl Keyte: Added support for disk statistics * Elevator latency, (C) 2000 Andrea Arcangeli SuSE + * Queue request tables / lock, (C) 2000 Jens Axboe */ /* @@ -37,12 +38,6 @@ #endif /* - * The request-struct contains all necessary data - * to load a nr of sectors into memory - */ -static struct request all_requests[NR_REQUEST]; - -/* * The "disk" task queue is used to start the actual requests * after a plug */ @@ -62,11 +57,6 @@ */ spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; -/* - * used to wait on when there are no free requests - */ -DECLARE_WAIT_QUEUE_HEAD(wait_for_request); - /* This specifies how many sectors to read ahead on the disk. */ int read_ahead[MAX_BLKDEV]; @@ -226,7 +216,7 @@ { #ifdef CONFIG_BLK_DEV_MD if (MAJOR(dev) == MD_MAJOR) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->request_lock); BUG(); } #endif @@ -237,10 +227,35 @@ queue_task(&q->plug_tq, &tq_disk); } +static void blk_request_table_init(request_queue_t *q) +{ + struct request *rq; + + /* + * Allocate request table for this queue + */ + q->writes_max = (2 * QUEUE_NR_REQUEST) / 3; + + q->request_table = kmalloc(QUEUE_NR_REQUEST * sizeof(struct request), + GFP_KERNEL); + if (q->request_table == NULL) + panic("ll_rw_blk: couldn't alloc mem for request table\n"); + + spin_lock_init(&q->request_lock); + rq = q->request_table + QUEUE_NR_REQUEST; + while (--rq >= q->request_table) { + rq->rq_status = RQ_INACTIVE; + list_add(&rq->table, &q->request_head); + } + init_waitqueue_head(&q->wait_for_request); +} + void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) { INIT_LIST_HEAD(&q->queue_head); + INIT_LIST_HEAD(&q->request_head); elevator_init(&q->elevator); + blk_request_table_init(q); q->request_fn = rfn; q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; @@ -250,6 +265,7 @@ q->plug_tq.routine = &generic_unplug_device; q->plug_tq.data = q; q->plugged = 0; + q->writes_pending = 0; /* * These booleans describe the queue properties. We set the * default (and most common) values here. Other drivers can @@ -268,84 +284,72 @@ request_queue_t * q = (request_queue_t *) data; unsigned long flags; - spin_lock_irqsave(&io_request_lock,flags); + spin_lock_irqsave(&q->request_lock,flags); if (q->plugged) { q->plugged = 0; if (!list_empty(&q->queue_head)) (q->request_fn)(q); } - spin_unlock_irqrestore(&io_request_lock,flags); + spin_unlock_irqrestore(&q->request_lock,flags); } +#define blkdev_free_request(head) list_entry((head)->next, struct request, table) + /* - * look for a free request in the first N entries. - * NOTE: interrupts must be disabled on the way in (on SMP the request queue - * spinlock has to be aquired), and will still be disabled on the way out. + * Get a free request. queue request_lock must be held and interrupts + * disabled on the way in. */ -static inline struct request * get_request(int n, kdev_t dev) +static inline struct request *get_request(request_queue_t *q, kdev_t dev) { - static struct request *prev_found = NULL, *prev_limit = NULL; - register struct request *req, *limit; - - if (n <= 0) - panic("get_request(%d): impossible!\n", n); + struct request *rq = NULL; - limit = all_requests + n; - if (limit != prev_limit) { - prev_limit = limit; - prev_found = all_requests; - } - req = prev_found; - for (;;) { - req = ((req > all_requests) ? req : limit) - 1; - if (req->rq_status == RQ_INACTIVE) - break; - if (req == prev_found) - return NULL; + if (!list_empty(&q->request_head)) { + rq = blkdev_free_request(&q->request_head); + list_del(&rq->table); + rq->rq_status = RQ_ACTIVE; + rq->rq_dev = dev; + rq->special = NULL; } - prev_found = req; - req->rq_status = RQ_ACTIVE; - req->rq_dev = dev; - req->special = NULL; - return req; + + return rq; } /* - * wait until a free request in the first N entries is available. + * Run tq_disk task queue until a free request becomes available */ -static struct request * __get_request_wait(int n, kdev_t dev) +static struct request *__get_request_wait(request_queue_t *q, kdev_t dev) { - register struct request *req; DECLARE_WAITQUEUE(wait, current); unsigned long flags; + struct request *rq; - add_wait_queue_exclusive(&wait_for_request, &wait); + add_wait_queue_exclusive(&q->wait_for_request, &wait); for (;;) { - __set_current_state(TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE); - spin_lock_irqsave(&io_request_lock,flags); - req = get_request(n, dev); - spin_unlock_irqrestore(&io_request_lock,flags); - if (req) + __set_current_state(TASK_UNINTERRUPTIBLE | TASK_EXCLUSIVE); + spin_lock_irqsave(&q->request_lock, flags); + rq = get_request(q, dev); + spin_unlock_irqrestore(&q->request_lock, flags); + if (rq) break; run_task_queue(&tq_disk); schedule(); } - remove_wait_queue(&wait_for_request, &wait); + remove_wait_queue(&q->wait_for_request, &wait); current->state = TASK_RUNNING; - return req; + return rq; } -static inline struct request * get_request_wait(int n, kdev_t dev) +static inline struct request *get_request_wait(request_queue_t *q, kdev_t dev) { register struct request *req; unsigned long flags; - spin_lock_irqsave(&io_request_lock,flags); - req = get_request(n, dev); - spin_unlock_irqrestore(&io_request_lock,flags); + spin_lock_irqsave(&q->request_lock, flags); + req = get_request(q, dev); + spin_unlock_irqrestore(&q->request_lock, flags); if (req) return req; - return __get_request_wait(n, dev); + return __get_request_wait(q, dev); } /* RO fail safe mechanism */ @@ -454,6 +458,36 @@ } } +request_queue_t inline *xxx_get_queue(kdev_t dev) +{ + int major = MAJOR(dev); + struct blk_dev_struct *bdev = blk_dev + major; + + if (bdev->queue) + return bdev->queue(dev); + + return &blk_dev[major].request_queue; +} + +void inline blkdev_release_request(struct request *req) +{ + request_queue_t *q = xxx_get_queue(req->rq_dev); + + if (req->cmd == WRITE) + q->writes_pending--; + + req->rq_status = RQ_INACTIVE; + + /* + * Request may not have originated from ll_rw_blk + */ + if (req->cmd == READA || req->cmd == READ || req->cmd == WRITE || + req->cmd == WRITERAW) { + list_add(&req->table, &q->request_head); + wake_up(&q->wait_for_request); + } +} + /* * Has to be called with the request spinlock aquired */ @@ -482,9 +516,8 @@ req->bhtail->b_reqnext = next->bh; req->bhtail = next->bhtail; req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; - next->rq_status = RQ_INACTIVE; + blkdev_release_request(next); list_del(&next->queue); - wake_up (&wait_for_request); } static inline void attempt_back_merge(request_queue_t * q, @@ -517,8 +550,8 @@ int major = MAJOR(bh->b_rdev); unsigned int sector, count; int max_segments = MAX_SEGMENTS; - struct request * req; - int rw_ahead, max_req, max_sectors; + struct request * req = NULL; + int rw_ahead, max_sectors; unsigned long flags; int orig_latency, latency, starving, sequence; @@ -557,7 +590,6 @@ if (buffer_uptodate(bh)) /* Hmmph! Already have it */ goto end_io; kstat.pgpgin++; - max_req = NR_REQUEST; /* reads take precedence */ break; case WRITERAW: rw = WRITE; @@ -573,8 +605,8 @@ * as they take precedence. The last third of the * requests are only for reads. */ + q->writes_pending++; kstat.pgpgout++; - max_req = (NR_REQUEST * 2) / 3; break; default: BUG(); @@ -599,13 +631,6 @@ /* look for a free request. */ /* - * Loop uses two requests, 1 for loop and 1 for the real device. - * Cut max_req in half to avoid running out and deadlocking. - */ - if ((major == LOOP_MAJOR) || (major == NBD_MAJOR)) - max_req >>= 1; - - /* * Try to coalesce the new request with old requests */ max_sectors = get_max_sectors(bh->b_rdev); @@ -617,7 +642,7 @@ * Now we acquire the request spinlock, we have to be mega careful * not to schedule or do something nonatomic */ - spin_lock_irqsave(&io_request_lock,flags); + spin_lock_irqsave(&q->request_lock,flags); elevator_debug(q, bh->b_rdev); if (list_empty(head)) { @@ -725,27 +750,31 @@ continue; q->elevator.sequence++; - spin_unlock_irqrestore(&io_request_lock,flags); + spin_unlock_irqrestore(&q->request_lock,flags); return; } /* find an unused request. */ get_rq: - req = get_request(max_req, bh->b_rdev); + if (q->writes_pending < q->writes_max); + req = get_request(q, bh->b_rdev); /* * if no request available: if rw_ahead, forget it, * otherwise try again blocking.. */ - if (!req) { - spin_unlock_irqrestore(&io_request_lock,flags); + if (req == NULL) { + spin_unlock_irqrestore(&q->request_lock, flags); if (rw_ahead) goto end_io; - req = __get_request_wait(max_req, bh->b_rdev); - spin_lock_irqsave(&io_request_lock,flags); - /* revalidate elevator */ + req = __get_request_wait(q, bh->b_rdev); + spin_lock_irqsave(&q->request_lock, flags); + + /* + * revalidate elevator, queue request_lock was dropped + */ head = &q->queue_head; if (q->head_active && !q->plugged) head = head->next; @@ -767,7 +796,7 @@ add_request(q, req, head, orig_latency); elevator_account_request(elevator, req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->request_lock, flags); return; end_io: @@ -807,10 +836,10 @@ * the IO request? (normal case) */ __make_request(q, rw, bh); - spin_lock_irqsave(&io_request_lock,flags); + spin_lock_irqsave(&q->request_lock,flags); if (q && !q->plugged) (q->request_fn)(q); - spin_unlock_irqrestore(&io_request_lock,flags); + spin_unlock_irqrestore(&q->request_lock,flags); return 0; } @@ -953,13 +982,12 @@ BUG(); if (req->sem != NULL) up(req->sem); - req->rq_status = RQ_INACTIVE; - wake_up(&wait_for_request); + + blkdev_release_request(req); } int __init blk_dev_init(void) { - struct request * req; struct blk_dev_struct *dev; for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;) { @@ -967,10 +995,6 @@ blk_init_queue(&dev->request_queue, NULL); } - req = all_requests + NR_REQUEST; - while (--req >= all_requests) { - req->rq_status = RQ_INACTIVE; - } memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); memset(max_sectors, 0, sizeof(max_sectors)); @@ -1092,3 +1116,4 @@ EXPORT_SYMBOL(blk_queue_pluggable); EXPORT_SYMBOL(blk_queue_make_request); EXPORT_SYMBOL(generic_make_request); +EXPORT_SYMBOL(blkdev_release_request); diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-2/drivers/block/loop.c linux/drivers/block/loop.c --- /opt/tmp/linux-2.3.99-pre6-2/drivers/block/loop.c Tue Mar 21 20:30:08 2000 +++ linux/drivers/block/loop.c Thu Apr 13 22:23:12 2000 @@ -319,7 +319,7 @@ block++; offset -= blksize; } - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->request_lock); while (len > 0) { @@ -365,7 +365,7 @@ file_backed: pos = ((loff_t)current_request->sector << 9) + lo->lo_offset; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->request_lock); if (current_request->cmd == WRITE) { if (lo_send(lo, dest_addr, len, pos, blksize)) goto error_out_lock; @@ -374,14 +374,14 @@ goto error_out_lock; } done: - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->request_lock); current_request->sector += current_request->current_nr_sectors; current_request->nr_sectors -= current_request->current_nr_sectors; list_add(¤t_request->queue, &q->queue_head); end_request(1); goto repeat; error_out_lock: - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->request_lock); error_out: list_add(¤t_request->queue, &q->queue_head); end_request(0); diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-2/drivers/block/nbd.c linux/drivers/block/nbd.c --- /opt/tmp/linux-2.3.99-pre6-2/drivers/block/nbd.c Tue Apr 11 15:59:13 2000 +++ linux/drivers/block/nbd.c Thu Apr 13 22:24:12 2000 @@ -325,22 +325,21 @@ #endif req->errors = 0; blkdev_dequeue_request(req); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->request_lock); down (&lo->queue_lock); + spin_lock_irq(&q->request_lock); list_add(&req->queue, &lo->queue_head); nbd_send_req(lo->sock, req); /* Why does this block? */ up (&lo->queue_lock); - - spin_lock_irq(&io_request_lock); continue; error_out: req->errors++; blkdev_dequeue_request(req); - spin_unlock(&io_request_lock); + spin_unlock(&q->request_lock); nbd_end_request(req); - spin_lock(&io_request_lock); + spin_lock(&q->request_lock); } return; } diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-2/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c --- /opt/tmp/linux-2.3.99-pre6-2/drivers/ide/ide-disk.c Mon Apr 3 22:30:00 2000 +++ linux/drivers/ide/ide-disk.c Fri Apr 14 00:46:05 2000 @@ -260,7 +260,7 @@ drive->name, rq->sector, (unsigned long) rq->buffer, nsect, rq->nr_sectors - nsect); #endif - spin_lock_irqsave(&io_request_lock, flags); /* Is this really necessary? */ + spin_lock_irqsave(&drive->queue.request_lock, flags); /* Is this really necessary? */ #ifdef CONFIG_BLK_DEV_PDC4030 rq->sector += nsect; #endif @@ -269,7 +269,7 @@ printk("%s: multwrite: count=%d, current=%ld\n", drive->name, nsect, rq->nr_sectors); #endif - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); break; } if ((rq->current_nr_sectors -= nsect) == 0) { @@ -277,18 +277,18 @@ rq->current_nr_sectors = rq->bh->b_size>>9; rq->buffer = rq->bh->b_data; } else { - spin_unlock_irqrestore(&io_request_lock, flags); printk("%s: buffer list corrupted (%ld, %ld, %d)\n", drive->name, rq->current_nr_sectors, rq->nr_sectors, nsect); ide_end_request(0, hwgroup); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); return 1; } } else { /* Fix the pointer.. we ate data */ rq->buffer += nsect << 9; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); } while (mcount); return 0; } @@ -464,10 +464,10 @@ ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL); if (ide_multwrite(drive, drive->mult_count)) { unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&drive->queue.request_lock, flags); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); return ide_stopped; } } else { diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-2/drivers/ide/ide.c linux/drivers/ide/ide.c --- /opt/tmp/linux-2.3.99-pre6-2/drivers/ide/ide.c Wed Apr 5 04:25:14 2000 +++ linux/drivers/ide/ide.c Fri Apr 14 00:51:03 2000 @@ -491,11 +491,7 @@ */ void ide_end_request (byte uptodate, ide_hwgroup_t *hwgroup) { - struct request *rq; - unsigned long flags; - - spin_lock_irqsave(&io_request_lock, flags); - rq = hwgroup->rq; + struct request *rq = hwgroup->rq; if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) { add_blkdev_randomness(MAJOR(rq->rq_dev)); @@ -503,7 +499,6 @@ hwgroup->rq = NULL; end_that_request_last(rq); } - spin_unlock_irqrestore(&io_request_lock, flags); } /* @@ -519,7 +514,7 @@ unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&drive->queue.request_lock, flags); if (hwgroup->handler != NULL) { printk("%s: ide_set_handler: handler not null; old=%p, new=%p\n", drive->name, hwgroup->handler, handler); @@ -528,7 +523,7 @@ hwgroup->expiry = expiry; hwgroup->timer.expires = jiffies + timeout; add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); } /* @@ -768,11 +763,11 @@ args[2] = IN_BYTE(IDE_NSECTOR_REG); } } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&drive->queue.request_lock, flags); blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; - rq->rq_status = RQ_INACTIVE; - spin_unlock_irqrestore(&io_request_lock, flags); + blkdev_release_request(rq); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); if (rq->sem != NULL) up(rq->sem); /* inform originator that rq has been serviced */ } @@ -869,6 +864,7 @@ */ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat) { + unsigned long flags; struct request *rq; byte err; @@ -905,8 +901,11 @@ if (rq->errors >= ERROR_MAX) { if (drive->driver != NULL) DRIVER(drive)->end_request(0, HWGROUP(drive)); - else + else { + spin_lock_irqsave(&drive->queue.request_lock, flags); ide_end_request(0, HWGROUP(drive)); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); + } } else { if ((rq->errors & ERROR_RESET) == ERROR_RESET) { ++rq->errors; @@ -1071,11 +1070,18 @@ static ide_startstop_t start_request (ide_drive_t *drive) { ide_startstop_t startstop; - unsigned long block, blockend; - struct request *rq = blkdev_entry_next_request(&drive->queue.queue_head); - unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS; + unsigned long block, blockend, flags; + struct request *rq; + unsigned int minor, unit; ide_hwif_t *hwif = HWIF(drive); + spin_lock_irqsave(&drive->queue.request_lock, flags); + rq = blkdev_entry_next_request(&drive->queue.queue_head); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); + + minor = MINOR(rq->rq_dev); + unit = minor >> PARTN_BITS; + #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", hwif->name, (unsigned long) rq); #endif @@ -1194,7 +1200,7 @@ /* * Issue a new request to a drive from hwgroup - * Caller must have already done spin_lock_irqsave(&io_request_lock, ..); + * Caller must have already aquired request queue spinlock * * A hwgroup is a serialized group of IDE interfaces. Usually there is * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640) @@ -1206,8 +1212,9 @@ * possibly along with many other devices. This is especially common in * PCI-based systems with off-board IDE controller cards. * - * The IDE driver uses the single global io_request_lock spinlock to protect - * access to the request queues, and to protect the hwgroup->busy flag. + * The IDE driver uses the spin lock associated with the hwgroup + * request queue to protect access to the request queues, and to protect + * the hwgroup->busy flag. * * The first thread into the driver for a particular hwgroup sets the * hwgroup->busy flag to indicate that this hwgroup is now active, @@ -1222,10 +1229,11 @@ * will start the next request from the queue. If no more work remains, * the driver will clear the hwgroup->busy flag and exit. * - * The io_request_lock (spinlock) is used to protect all access to the - * hwgroup->busy flag, but is otherwise not needed for most processing in - * the driver. This makes the driver much more friendlier to shared IRQs - * than previous designs, while remaining 100% (?) SMP safe and capable. + * The drive request queue request_lock (spinlock) is used to protect all + * access to the hwgroup->busy flag, but is otherwise not needed for most + * processing in the driver. This makes the driver much more friendly to + * shared IRQs than previous designs, while remaining 100% (?) SMP safe and + * capable. */ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) { @@ -1294,10 +1302,8 @@ */ if (hwif->irq != masked_irq) disable_irq_nosync(hwif->irq); - spin_unlock(&io_request_lock); ide__sti(); /* allow other IRQs while we start this request */ startstop = start_request(drive); - spin_lock_irq(&io_request_lock); if (hwif->irq != masked_irq) enable_irq(hwif->irq); if (startstop == ide_stopped) @@ -1396,7 +1402,7 @@ unsigned long flags; unsigned long wait; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&hwgroup->drive->queue.request_lock, flags); del_timer(&hwgroup->timer); if ((handler = hwgroup->handler) == NULL) { @@ -1428,7 +1434,7 @@ /* reset timer */ hwgroup->timer.expires = jiffies + wait; add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&hwgroup->drive->queue.request_lock, flags); return; } } @@ -1438,7 +1444,7 @@ * the handler() function, which means we need to globally * mask the specific IRQ: */ - spin_unlock(&io_request_lock); + spin_unlock(&hwgroup->drive->queue.request_lock); hwif = HWIF(drive); disable_irq(hwif->irq); /* disable_irq_nosync ?? */ __cli(); /* local CPU only, as if we were handling an interrupt */ @@ -1461,13 +1467,13 @@ set_recovery_timer(hwif); drive->service_time = jiffies - drive->service_start; enable_irq(hwif->irq); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&hwgroup->drive->queue.request_lock); if (startstop == ide_stopped) hwgroup->busy = 0; } } ide_do_request(hwgroup, 0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&hwgroup->drive->queue.request_lock, flags); } /* @@ -1530,11 +1536,11 @@ ide_handler_t *handler; ide_startstop_t startstop; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&hwgroup->drive->queue.request_lock, flags); hwif = hwgroup->hwif; if (!ide_ack_intr(hwif)) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&hwgroup->drive->queue.request_lock, flags); return; } @@ -1568,7 +1574,7 @@ (void) IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]); #endif /* CONFIG_BLK_DEV_IDEPCI */ } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&hwgroup->drive->queue.request_lock, flags); return; } drive = hwgroup->drive; @@ -1576,7 +1582,7 @@ /* * This should NEVER happen, and there isn't much we could do about it here. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); return; } if (!drive_is_ready(drive)) { @@ -1586,7 +1592,7 @@ * the IRQ before their status register is up to date. Hopefully we have * enough advance overhead that the latter isn't a problem. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); return; } if (!hwgroup->busy) { @@ -1595,12 +1601,12 @@ } hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock(&io_request_lock); + spin_unlock(&drive->queue.request_lock); if (drive->unmask) ide__sti(); /* local CPU only */ startstop = handler(drive); /* service this interrupt, may set handler for next interrupt */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&drive->queue.request_lock); /* * Note that handler() may have set things up for another @@ -1619,7 +1625,7 @@ printk("%s: ide_intr: huh? expected NULL handler on exit\n", drive->name); } } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); } /* @@ -1712,7 +1718,7 @@ rq->rq_dev = MKDEV(major,(drive->select.b.unit)<sem = &sem; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&drive->queue.request_lock, flags); queue_head = &drive->queue.queue_head; if (list_empty(queue_head) || action == ide_preempt) { if (action == ide_preempt) @@ -1725,7 +1731,7 @@ } list_add(&rq->queue, queue_head); ide_do_request(hwgroup, 0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); if (action == ide_wait) { down(&sem); /* wait for it to be serviced */ return rq->errors ? -EIO : 0; /* return -EIO if errors */ @@ -1754,14 +1760,14 @@ major = MAJOR(i_rdev); minor = drive->select.b.unit << PARTN_BITS; hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&drive->queue.request_lock, flags); if (drive->busy || (drive->usage > 1)) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); return -EBUSY; }; drive->busy = 1; MOD_INC_USE_COUNT; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); for (p = 0; p < (1<part[p].nr_sects > 0) { @@ -2288,7 +2291,7 @@ unsigned long flags; if ((setting->rw & SETTING_READ)) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&drive->queue.request_lock, flags); switch(setting->data_type) { case TYPE_BYTE: val = *((u8 *) setting->data); @@ -2301,7 +2304,7 @@ val = *((u32 *) setting->data); break; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&drive->queue.request_lock, flags); } return val; } @@ -2311,7 +2314,7 @@ ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned long timeout = jiffies + (3 * HZ); - spin_lock_irqsave(&io_request_lock, *flags); + spin_lock_irqsave(&drive->queue.request_lock, *flags); while (hwgroup->busy) { unsigned long lflags; spin_unlock_irqrestore(&io_request_lock, *flags); diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-2/drivers/scsi/scsi_error.c linux/drivers/scsi/scsi_error.c --- /opt/tmp/linux-2.3.99-pre6-2/drivers/scsi/scsi_error.c Thu Apr 13 21:56:01 2000 +++ linux/drivers/scsi/scsi_error.c Thu Apr 13 23:08:05 2000 @@ -1253,7 +1253,6 @@ * now that error recovery is done, we will need to ensure that these * requests are started. */ - spin_lock_irqsave(&io_request_lock, flags); for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { request_queue_t *q; if ((host->can_queue > 0 && (host->host_busy >= host->can_queue)) @@ -1263,9 +1262,10 @@ break; } q = &SDpnt->request_queue; + spin_lock_irqsave(&q->request_lock, flags); q->request_fn(q); + spin_unlock_irqrestore(&q->request_lock, flags); } - spin_unlock_irqrestore(&io_request_lock, flags); } /* diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-2/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c --- /opt/tmp/linux-2.3.99-pre6-2/drivers/scsi/scsi_lib.c Thu Apr 13 21:56:01 2000 +++ linux/drivers/scsi/scsi_lib.c Thu Apr 13 22:34:57 2000 @@ -95,7 +95,7 @@ * head of the queue for things like a QUEUE_FULL message from a * device, or a host that is unable to accept a particular command. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->request_lock, flags); if (at_head) { list_add(&SCpnt->request.queue, &q->queue_head); @@ -117,7 +117,7 @@ * the host can queue it, then send it off. */ q->request_fn(q); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->request_lock, flags); return 0; } @@ -165,7 +165,7 @@ * head of the queue for things like a QUEUE_FULL message from a * device, or a host that is unable to accept a particular command. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->request_lock, flags); if (at_head) { list_add(&SRpnt->sr_request.queue, &q->queue_head); @@ -187,7 +187,7 @@ * the host can queue it, then send it off. */ q->request_fn(q); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->request_lock, flags); return 0; } @@ -289,9 +289,9 @@ Scsi_Device *SDpnt; struct Scsi_Host *SHpnt; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->request_lock, 0); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->request_lock, flags); if (SCpnt != NULL) { /* @@ -367,7 +367,7 @@ SHpnt->some_device_starved = 0; } } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->request_lock, flags); } /* @@ -839,7 +839,7 @@ struct Scsi_Host *SHpnt; struct Scsi_Device_Template *STpnt; - ASSERT_LOCK(&io_request_lock, 1); + ASSERT_LOCK(&q->request_lock, 1); SDpnt = (Scsi_Device *) q->queuedata; if (!SDpnt) { @@ -919,9 +919,9 @@ */ SDpnt->was_reset = 0; if (SDpnt->removable && !in_interrupt()) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->request_lock); scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->request_lock); continue; } } @@ -1016,11 +1016,11 @@ memcpy(&SCpnt->request, req, sizeof(struct request)); /* - * We have copied the data out of the request block - it is now in - * a field in SCpnt. Release the request block. + * We have copied the data out of the request block - + * it is now in a field in SCpnt. Release the + * request block. */ - req->rq_status = RQ_INACTIVE; - wake_up(&wait_for_request); + blkdev_release_request(req); } /* * Now it is finally safe to release the lock. We are @@ -1029,7 +1029,7 @@ * another. */ req = NULL; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->request_lock); if (SCpnt->request.cmd != SPECIAL) { /* @@ -1053,7 +1053,7 @@ if (!SDpnt->scsi_init_io_fn(SCpnt)) { scsi_end_request(SCpnt, 0, SCpnt->request.nr_sectors); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->request_lock); SHpnt->host_busy--; SDpnt->device_busy--; continue; @@ -1065,7 +1065,7 @@ scsi_release_buffers(SCpnt); scsi_end_request(SCpnt, 0, SCpnt->request.nr_sectors); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->request_lock); SHpnt->host_busy--; SDpnt->device_busy--; continue; @@ -1086,7 +1086,7 @@ * Now we need to grab the lock again. We are about to mess with * the request queue and try to find another command. */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->request_lock); } } diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-2/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c --- /opt/tmp/linux-2.3.99-pre6-2/drivers/scsi/scsi_merge.c Mon Mar 13 04:32:58 2000 +++ linux/drivers/scsi/scsi_merge.c Thu Apr 13 22:31:31 2000 @@ -386,7 +386,7 @@ * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: request queue request_lock is assumed to be held here. * * Notes: Some drivers have limited scatter-gather table sizes, and * thus they cannot queue an infinitely large command. This @@ -523,7 +523,7 @@ * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: request queue request_lock is assumed to be held here. * * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. @@ -572,7 +572,7 @@ * Returns: 1 if it is OK to merge the two requests. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: request queue request_lock is assumed to be held here. * * Notes: Some drivers have limited scatter-gather table sizes, and * thus they cannot queue an infinitely large command. This @@ -723,7 +723,7 @@ * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: request queue request_lock is assumed to be held here. * * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-2/include/linux/blkdev.h linux/include/linux/blkdev.h --- /opt/tmp/linux-2.3.99-pre6-2/include/linux/blkdev.h Fri Apr 14 00:10:01 2000 +++ linux/include/linux/blkdev.h Fri Apr 14 00:53:27 2000 @@ -19,6 +19,7 @@ struct request { struct list_head queue; int elevator_sequence; + struct list_head table; volatile int rq_status; /* should split this into a few status bits */ #define RQ_INACTIVE (-1) @@ -60,9 +61,12 @@ typedef void (plug_device_fn) (request_queue_t *q, kdev_t device); typedef void (unplug_device_fn) (void *q); +#define QUEUE_NR_REQUEST 128 + struct request_queue { struct list_head queue_head; + struct list_head request_head; /* together with queue_head for cacheline sharing */ elevator_t elevator; @@ -92,6 +96,12 @@ * not. */ char head_active; + + unsigned int writes_max; + unsigned int writes_pending; + struct request *request_table; + spinlock_t request_lock; + wait_queue_head_t wait_for_request; }; struct blk_dev_struct { @@ -118,13 +128,13 @@ extern struct sec_size * blk_sec[MAX_BLKDEV]; extern struct blk_dev_struct blk_dev[MAX_BLKDEV]; -extern wait_queue_head_t wait_for_request; extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size); extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); extern void generic_unplug_device(void * data); extern int generic_make_request(request_queue_t *q, int rw, struct buffer_head * bh); extern request_queue_t * blk_get_queue(kdev_t dev); +extern void blkdev_release_request(struct request *); /* * Access functions for manipulating queue properties diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-2/kernel/ksyms.c linux/kernel/ksyms.c --- /opt/tmp/linux-2.3.99-pre6-2/kernel/ksyms.c Thu Apr 13 21:56:02 2000 +++ linux/kernel/ksyms.c Thu Apr 13 21:58:48 2000 @@ -268,7 +268,6 @@ /* block device driver support */ EXPORT_SYMBOL(block_read); EXPORT_SYMBOL(block_write); -EXPORT_SYMBOL(wait_for_request); EXPORT_SYMBOL(blksize_size); EXPORT_SYMBOL(hardsect_size); EXPORT_SYMBOL(blk_size);