Re: Processes receive SIGSEGV if TCQ is enabled

From: Nick Piggin
Date: Thu Oct 30 2003 - 18:41:39 EST


Hi,
If you're testing IDE TCQ, please try the following patch and use the
default io scheduler. It won't fix anything, but it poisons requests
so we can sometimes tell if they are being used in the wrong places.
I have seen warnings that lead me to believe this might be happening.
Its against 2.6.0-test9-mm1. Report any stack traces you see. Thanks.

Nick

linux-2.6-npiggin/drivers/block/as-iosched.c | 69 ++++++++++++++++++++-------
1 files changed, 53 insertions(+), 16 deletions(-)

diff -puN drivers/block/as-iosched.c~as-req-debug drivers/block/as-iosched.c
--- linux-2.6/drivers/block/as-iosched.c~as-req-debug 2003-10-31 09:42:27.000000000 +1100
+++ linux-2.6-npiggin/drivers/block/as-iosched.c 2003-10-31 10:33:00.000000000 +1100
@@ -136,6 +136,10 @@ enum arq_state {
scheduler */
AS_RQ_DISPATCHED, /* On the dispatch list. It belongs to the
driver now */
+ AS_RQ_PRESCHED, /* Debug poisoning for requests being used */
+ AS_RQ_REMOVED,
+ AS_RQ_MERGED,
+ AS_RQ_POSTSCHED, /* when they shouldn't be */
};

struct as_rq {
@@ -897,8 +901,14 @@ static void as_completed_request(request

WARN_ON(!list_empty(&rq->queuelist));

- if (unlikely(arq->state != AS_RQ_DISPATCHED))
- return;
+ if (arq->state == AS_RQ_MERGED)
+ goto out;
+
+ if (arq->state != AS_RQ_REMOVED) {
+ printk("arq->state %d\n", arq->state);
+ WARN_ON(1);
+ goto out;
+ }

if (ad->changed_batch && ad->nr_dispatched == 1) {
WARN_ON(ad->batch_data_dir == arq->is_sync);
@@ -926,7 +936,7 @@ static void as_completed_request(request
}

if (!arq->io_context)
- return;
+ goto out;

if (ad->io_context == arq->io_context) {
ad->antic_start = jiffies;
@@ -942,7 +952,7 @@ static void as_completed_request(request

aic = arq->io_context->aic;
if (!aic)
- return;
+ goto out;

spin_lock(&aic->lock);
if (arq->is_sync == REQ_SYNC) {
@@ -952,6 +962,9 @@ static void as_completed_request(request
spin_unlock(&aic->lock);

put_io_context(arq->io_context);
+
+out:
+ arq->state = AS_RQ_POSTSCHED;
}

/*
@@ -1020,14 +1033,14 @@ static void as_remove_request(request_qu
struct as_rq *arq = RQ_DATA(rq);

if (unlikely(arq->state == AS_RQ_NEW))
- return;
-
- if (!arq) {
- WARN_ON(1);
- return;
- }
+ goto out;

if (ON_RB(&arq->rb_node)) {
+ if (arq->state != AS_RQ_QUEUED) {
+ printk("arq->state %d\n", arq->state);
+ WARN_ON(1);
+ goto out;
+ }
/*
* We'll lose the aliased request(s) here. I don't think this
* will ever happen, but if it does, hopefully someone will
@@ -1035,8 +1048,16 @@ static void as_remove_request(request_qu
*/
WARN_ON(!list_empty(&rq->queuelist));
as_remove_queued_request(q, rq);
- } else
+ } else {
+ if (arq->state != AS_RQ_DISPATCHED) {
+ printk("arq->state %d\n", arq->state);
+ WARN_ON(1);
+ goto out;
+ }
as_remove_dispatched_request(q, rq);
+ }
+out:
+ arq->state = AS_RQ_REMOVED;
}

/*
@@ -1276,8 +1297,6 @@ fifo_expired:
ad->new_batch = 1;

ad->changed_batch = 0;
-
-// arq->request->flags |= REQ_SOFTBARRIER;
}

/*
@@ -1402,6 +1421,11 @@ static void as_requeue_request(request_q
struct as_rq *arq = RQ_DATA(rq);

if (arq) {
+ if (arq->state != AS_RQ_REMOVED) {
+ printk("arq->state %d\n", arq->state);
+ WARN_ON(1);
+ }
+
arq->state = AS_RQ_DISPATCHED;
if (arq->io_context && arq->io_context->aic)
atomic_inc(&arq->io_context->aic->nr_dispatched);
@@ -1421,12 +1445,18 @@ as_insert_request(request_queue_t *q, st
struct as_data *ad = q->elevator.elevator_data;
struct as_rq *arq = RQ_DATA(rq);

-#if 0
+ if (arq) {
+ if (arq->state != AS_RQ_PRESCHED) {
+ printk("arq->state: %d\n", arq->state);
+ WARN_ON(1);
+ }
+ arq->state = AS_RQ_NEW;
+ }
+
/* barriers must flush the reorder queue */
if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
&& where == ELEVATOR_INSERT_SORT))
where = ELEVATOR_INSERT_BACK;
-#endif

switch (where) {
case ELEVATOR_INSERT_BACK:
@@ -1662,6 +1692,8 @@ as_merged_requests(request_queue_t *q, s
*/
as_remove_queued_request(q, next);
put_io_context(anext->io_context);
+
+ anext->state = AS_RQ_MERGED;
}

/*
@@ -1694,6 +1726,11 @@ static void as_put_request(request_queue
return;
}

+ if (arq->state != AS_RQ_POSTSCHED) {
+ printk("arq->state %d\n", arq->state);
+ WARN_ON(1);
+ }
+
mempool_free(arq, ad->arq_pool);
rq->elevator_private = NULL;
}
@@ -1707,7 +1744,7 @@ static int as_set_request(request_queue_
memset(arq, 0, sizeof(*arq));
RB_CLEAR(&arq->rb_node);
arq->request = rq;
- arq->state = AS_RQ_NEW;
+ arq->state = AS_RQ_PRESCHED;
arq->io_context = NULL;
INIT_LIST_HEAD(&arq->hash);
arq->on_hash = 0;

_