diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-3/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- /opt/tmp/linux-2.3.99-pre6-3/drivers/block/ll_rw_blk.c Sun Apr 16 16:02:11 2000 +++ linux/drivers/block/ll_rw_blk.c Mon Apr 17 04:00:20 2000 @@ -37,10 +37,11 @@ #endif /* - * The request-struct contains all necessary data - * to load a nr of sectors into memory + * The two request free lists, one for read, write, and a third for + * loop/nbd */ -static struct request all_requests[NR_REQUEST]; +#define LOOP_LIST 2 +static struct list_head request_freelist[3]; /* * The "disk" task queue is used to start the actual requests @@ -282,38 +283,56 @@ * NOTE: interrupts must be disabled on the way in (on SMP the request queue * spinlock has to be aquired), and will still be disabled on the way out. */ -static inline struct request * get_request(int n, kdev_t dev) +static inline struct request * get_request(int rw, kdev_t dev) { - static struct request *prev_found = NULL, *prev_limit = NULL; - register struct request *req, *limit; + struct list_head *list; + struct request *rq = NULL; - if (n <= 0) - panic("get_request(%d): impossible!\n", n); + if ((MAJOR(dev) == LOOP_MAJOR) || (MAJOR(dev) == NBD_MAJOR)) { + if (!list_empty(&request_freelist[LOOP_LIST])) { + list = &request_freelist[LOOP_LIST]; + rq = list_entry(list->next, struct request, table); + goto out; + } + return NULL; + } + + /* + * Reads get preferential treatment and are allowed to steal + * from the write free list if necessary. + */ + list = &request_freelist[rw]; + if (!list_empty(list)) + rq = list_entry(list->next, struct request, table); - limit = all_requests + n; - if (limit != prev_limit) { - prev_limit = limit; - prev_found = all_requests; + /* + * No request. If write list is non-empty, rw is READ and we + * grab one from there if possible. If rw is WRITE, just + * forget it. + */ + if (rq == NULL) { + if (!list_empty(&request_freelist[WRITE])) { + list = &request_freelist[WRITE]; + rq = list_entry(list->next, struct request, table); + } } - req = prev_found; - for (;;) { - req = ((req > all_requests) ? req : limit) - 1; - if (req->rq_status == RQ_INACTIVE) - break; - if (req == prev_found) - return NULL; + + if (rq) { +out: + list_del(&rq->table); + rq->rq_dev = dev; + rq->free_list = list; + rq->rq_status = RQ_ACTIVE; + rq->special = NULL; } - prev_found = req; - req->rq_status = RQ_ACTIVE; - req->rq_dev = dev; - req->special = NULL; - return req; + + return rq; } /* * wait until a free request in the first N entries is available. */ -static struct request * __get_request_wait(int n, kdev_t dev) +static struct request * __get_request_wait(int rw, kdev_t dev) { register struct request *req; DECLARE_WAITQUEUE(wait, current); @@ -323,7 +342,7 @@ for (;;) { __set_current_state(TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE); spin_lock_irqsave(&io_request_lock,flags); - req = get_request(n, dev); + req = get_request(rw, dev); spin_unlock_irqrestore(&io_request_lock,flags); if (req) break; @@ -335,17 +354,17 @@ return req; } -static inline struct request * get_request_wait(int n, kdev_t dev) +static inline struct request * get_request_wait(int rw, kdev_t dev) { register struct request *req; unsigned long flags; spin_lock_irqsave(&io_request_lock,flags); - req = get_request(n, dev); + req = get_request(rw, dev); spin_unlock_irqrestore(&io_request_lock,flags); if (req) return req; - return __get_request_wait(n, dev); + return __get_request_wait(rw, dev); } /* RO fail safe mechanism */ @@ -454,6 +473,17 @@ } } +inline void blkdev_release_request(struct request *rq) +{ + rq->rq_status = RQ_INACTIVE; + + if (rq->free_list) { + list_add(&rq->table, rq->free_list); + rq->free_list = NULL; + wake_up(&wait_for_request); + } +} + /* * Has to be called with the request spinlock aquired */ @@ -482,9 +512,8 @@ req->bhtail->b_reqnext = next->bh; req->bhtail = next->bhtail; req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; - next->rq_status = RQ_INACTIVE; + blkdev_release_request(next); list_del(&next->queue); - wake_up (&wait_for_request); } static inline void attempt_back_merge(request_queue_t * q, @@ -518,7 +547,7 @@ unsigned int sector, count; int max_segments = MAX_SEGMENTS; struct request * req; - int rw_ahead, max_req, max_sectors; + int rw_ahead, max_sectors; unsigned long flags; int orig_latency, latency, starving, sequence; @@ -557,7 +586,6 @@ if (buffer_uptodate(bh)) /* Hmmph! Already have it */ goto end_io; kstat.pgpgin++; - max_req = NR_REQUEST; /* reads take precedence */ break; case WRITERAW: rw = WRITE; @@ -574,7 +602,6 @@ * requests are only for reads. */ kstat.pgpgout++; - max_req = (NR_REQUEST * 2) / 3; break; default: BUG(); @@ -599,13 +626,6 @@ /* look for a free request. */ /* - * Loop uses two requests, 1 for loop and 1 for the real device. - * Cut max_req in half to avoid running out and deadlocking. - */ - if ((major == LOOP_MAJOR) || (major == NBD_MAJOR)) - max_req >>= 1; - - /* * Try to coalesce the new request with old requests */ max_sectors = get_max_sectors(bh->b_rdev); @@ -732,7 +752,7 @@ /* find an unused request. */ get_rq: - req = get_request(max_req, bh->b_rdev); + req = get_request(rw, bh->b_rdev); /* * if no request available: if rw_ahead, forget it, @@ -742,7 +762,7 @@ spin_unlock_irqrestore(&io_request_lock,flags); if (rw_ahead) goto end_io; - req = __get_request_wait(max_req, bh->b_rdev); + req = __get_request_wait(rw, bh->b_rdev); spin_lock_irqsave(&io_request_lock,flags); /* revalidate elevator */ @@ -953,24 +973,40 @@ BUG(); if (req->sem != NULL) up(req->sem); - req->rq_status = RQ_INACTIVE; - wake_up(&wait_for_request); + blkdev_release_request(req); } -int __init blk_dev_init(void) +int blk_dev_init(void) { - struct request * req; + struct request *table; struct blk_dev_struct *dev; + int i; + + /* + * Allocate requests and add to free lists + */ + table = kmalloc(16 + NR_REQUEST * sizeof(struct request), GFP_KERNEL); + INIT_LIST_HEAD(&request_freelist[READ]); + INIT_LIST_HEAD(&request_freelist[WRITE]); + INIT_LIST_HEAD(&request_freelist[LOOP_LIST]); + for (i = 0; i < NR_REQUEST; i++) { + table[i].rq_status = RQ_INACTIVE; + list_add(&table[i].table, &request_freelist[i & 1]); + } + + /* + * Quick hack for loop/nbd table + */ + for (; i < 16 + NR_REQUEST; i++) { + table[i].rq_status = RQ_INACTIVE; + list_add(&table[i].table, &request_freelist[LOOP_LIST]); + } for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;) { dev->queue = NULL; blk_init_queue(&dev->request_queue, NULL); } - req = all_requests + NR_REQUEST; - while (--req >= all_requests) { - req->rq_status = RQ_INACTIVE; - } memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); memset(max_sectors, 0, sizeof(max_sectors)); diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-3/drivers/ide/ide.c linux/drivers/ide/ide.c --- /opt/tmp/linux-2.3.99-pre6-3/drivers/ide/ide.c Sun Apr 16 14:35:17 2000 +++ linux/drivers/ide/ide.c Mon Apr 17 03:48:26 2000 @@ -771,7 +771,7 @@ spin_lock_irqsave(&io_request_lock, flags); blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; - rq->rq_status = RQ_INACTIVE; + blkdev_release_request(rq); spin_unlock_irqrestore(&io_request_lock, flags); if (rq->sem != NULL) up(rq->sem); /* inform originator that rq has been serviced */ @@ -1666,6 +1666,7 @@ rq->bh = NULL; rq->bhtail = NULL; rq->q = NULL; + rq->free_list = NULL; } /* diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-3/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c --- /opt/tmp/linux-2.3.99-pre6-3/drivers/scsi/scsi_lib.c Fri Apr 14 23:46:31 2000 +++ linux/drivers/scsi/scsi_lib.c Mon Apr 17 00:31:51 2000 @@ -1019,8 +1019,7 @@ * We have copied the data out of the request block - it is now in * a field in SCpnt. Release the request block. */ - req->rq_status = RQ_INACTIVE; - wake_up(&wait_for_request); + blkdev_release_request(req); } /* * Now it is finally safe to release the lock. We are diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-3/fs/buffer.c linux/fs/buffer.c --- /opt/tmp/linux-2.3.99-pre6-3/fs/buffer.c Tue Mar 21 20:30:08 2000 +++ linux/fs/buffer.c Mon Apr 17 02:12:35 2000 @@ -494,17 +494,6 @@ __remove_from_lru_list(bh, bh->b_list); } -static void insert_into_queues(struct buffer_head *bh) -{ - struct buffer_head **head = &hash(bh->b_dev, bh->b_blocknr); - - spin_lock(&lru_list_lock); - write_lock(&hash_table_lock); - __hash_link(bh, head); - __insert_into_lru_list(bh, bh->b_list); - write_unlock(&hash_table_lock); - spin_unlock(&lru_list_lock); -} /* This function must only run if there are no other * references _anywhere_ to this buffer head. @@ -536,24 +525,56 @@ * will force it bad). This shouldn't really happen currently, but * the code is ready. */ -struct buffer_head * get_hash_table(kdev_t dev, int block, int size) +static struct buffer_head * __get_hash_table(kdev_t dev, int block, int size, + struct buffer_head **head) { - struct buffer_head **head = &hash(dev, block); struct buffer_head *bh; - read_lock(&hash_table_lock); for(bh = *head; bh; bh = bh->b_next) if (bh->b_blocknr == block && - bh->b_size == size && + bh->b_size == size && /* is this required? */ bh->b_dev == dev) break; if (bh) atomic_inc(&bh->b_count); + + return bh; +} + +struct buffer_head * get_hash_table(kdev_t dev, int block, int size) +{ + struct buffer_head **head = &hash(dev, block); + struct buffer_head *bh; + + read_lock(&hash_table_lock); + bh = __get_hash_table(dev, block, size, head); read_unlock(&hash_table_lock); return bh; } +static int insert_into_queues_unique(struct buffer_head *bh) +{ + struct buffer_head **head = &hash(bh->b_dev, bh->b_blocknr); + struct buffer_head *alias; + int err = 0; + + spin_lock(&lru_list_lock); + write_lock(&hash_table_lock); + + alias = __get_hash_table(bh->b_dev, bh->b_blocknr, bh->b_size, head); + if (!alias) { + __hash_link(bh, head); + __insert_into_lru_list(bh, bh->b_list); + } else + err = 1; + + write_unlock(&hash_table_lock); + spin_unlock(&lru_list_lock); + + return err; +} + unsigned int get_hardblocksize(kdev_t dev) { /* @@ -840,8 +861,16 @@ bh->b_blocknr = block; bh->b_state = 1 << BH_Mapped; - /* Insert the buffer into the regular lists */ - insert_into_queues(bh); + /* Insert the buffer into the regular lists; check noone + else added it first */ + + if (!insert_into_queues_unique(bh)) + goto out; + + /* someone added it after we last check the hash table */ + put_last_free(bh); + goto repeat; + out: touch_buffer(bh); return bh; diff -ur --exclude-from /home/axboe/cdrom/exclude-from /opt/tmp/linux-2.3.99-pre6-3/include/linux/blkdev.h linux/include/linux/blkdev.h --- /opt/tmp/linux-2.3.99-pre6-3/include/linux/blkdev.h Fri Apr 14 23:59:10 2000 +++ linux/include/linux/blkdev.h Mon Apr 17 00:29:48 2000 @@ -19,6 +19,7 @@ struct request { struct list_head queue; int elevator_sequence; + struct list_head table; volatile int rq_status; /* should split this into a few status bits */ #define RQ_INACTIVE (-1) @@ -42,6 +43,7 @@ struct buffer_head * bh; struct buffer_head * bhtail; request_queue_t * q; + struct list_head *free_list; }; #include @@ -125,6 +127,7 @@ extern int generic_make_request(request_queue_t *q, int rw, struct buffer_head * bh); extern request_queue_t * blk_get_queue(kdev_t dev); +extern void blkdev_release_request(struct request *rq); /* * Access functions for manipulating queue properties