Fwd: A problem about IO scheduler in kernel

From: 韩磊
Date: Fri Nov 15 2013 - 02:04:14 EST


---------- Forwarded message ----------
From: 韩磊 <bonben1989@xxxxxxxxx>
Date: 2013/11/15
Subject: A problem about IO scheduler in kernel
To: Linux Kernel Mailing List <linux-kernel@xxxxxxxxxxxxxxx>


These days I was programming about IO scheduler called
"Simple_Deadline" in kernel.
"Simple_Deadline" is based on "deadline-iosched".The algorithm is very
simple.It have two lists,one is read,the other is write.A request
enter lists based its weight which count accord to the request's
size,read or write. And when dispatch a request just compare the
weight between read list and write list,the smaller one dispatches.

When I modprobe this module and run it,if a bit of IO come,it works
well.But when runs a large number IO,the system will crash. Can you
help me to find the problem? I am so sad and helpless about it.

When system crashed,the screen display some information:

Message from syslogd@han at Nov 15 13:12:03 ...
kernel:------------[ cut here ]------------

Message from syslogd@han at Nov 15 13:12:03 ...
kernel:invalid opcode: 0000 [#1] SMP

Message from syslogd@han at Nov 15 13:12:03 ...
kernel:last sysfs file:
/sys/devices/pci0000:00/0000:00:04.0/0000:05:00.0/host0/port-0:0/end_device-0:0/target0:0:0/0:0:0:0/block/sda/queue/scheduler

Message from syslogd@han at Nov 15 13:12:03 ...
kernel:Stack:

Message from syslogd@han at Nov 15 13:12:03 ...
kernel:Call Trace:

Message from syslogd@han at Nov 15 13:12:03 ...
kernel:Code: 2a 48 89 fe 4c 89 e7 e8 35 ff 01 00 48 8b 83 80 00 00 00
83 e0 03 4c 09 e0 4c 8b 64 24 08 48 89 83 80 00 00 00 48 8b 1c 24 c9
c3 <0f> 0b eb fe 66 90 55 48 89 e5 0f 1f 44 00 00 45 31 c0 48 89 f9


The code in accessory! Please help find the bugs! Thank you!
/*
* Deadline i/o scheduler.
*
* Copyright (C) 2002 Jens Axboe <axboe@xxxxxxxxx>
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/bio.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/rbtree.h>

static const int read_expire = HZ / 2;
static const int write_expire = 5 * HZ;
static const int writes_starved = 2;
static const int fifo_batch = 16;




#define DELTA 1

static const int read_time_per_byte = 30 ;
static const int write_time_per_byte = 30 ;


static const int read_time_once_transfer = 30 ;
static const int write_time_once_transfer = 300 ;

struct req_list_head
{
struct request *current_req;
unsigned long last_jiffies;
long current_weight;
struct req_list_head *next;
struct req_list_head *prev;
};

struct deadline_data {

struct req_list_head req_weight_list[2];
struct request *next_rq[2];
struct rb_root sort_list[2];
struct list_head fifo_list[2];
unsigned int batching; /* number of sequential requests made */
sector_t last_sector; /* head position */
unsigned int starved; /* times reads have starved writes */
int fifo_expire[2];
int fifo_batch;
int writes_starved;
int front_merges;

};
void req_list_remove_request(struct req_list_head *list_head);
struct request *select_req_from_weight_list(struct req_list_head *req_write_list,struct req_list_head *req_read_list);
int req_list_empty(struct req_list_head *list_head);
void req_list_add(struct req_list_head *new_req,struct req_list_head *list_head);
void update_req_list_weight(struct req_list_head *list_head);
long req_list_count_weight(int data_dir,unsigned int bio_size);
void Init_req_list_head(struct req_list_head *req_weight_list);
static void deadline_move_request(struct deadline_data *, struct request *);

static inline struct rb_root *
deadline_rb_root(struct deadline_data *dd, struct request *rq)
{
return &dd->sort_list[rq_data_dir(rq)];
}

static inline struct request *
deadline_latter_request(struct request *rq)
{
struct rb_node *node = rb_next(&rq->rb_node);

if (node)
return rb_entry_rq(node);

return NULL;
}

static void
deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
{
struct rb_root *root = deadline_rb_root(dd, rq);
struct request *__alias;

while (unlikely(__alias = elv_rb_add(root, rq)))
deadline_move_request(dd, __alias);
}

static inline void
deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
{
const int data_dir = rq_data_dir(rq);

if (dd->next_rq[data_dir] == rq)
dd->next_rq[data_dir] = deadline_latter_request(rq);

elv_rb_del(deadline_rb_root(dd, rq), rq);
}

/*
* add rq to rbtree and fifo
*/
static void
deadline_add_request(struct request_queue *q, struct request *rq)
{
printk("enter merge_add_request\n");
struct deadline_data *dd = q->elevator->elevator_data;
const int data_dir = rq_data_dir(rq);

unsigned int bio_size=rq->__data_len;


struct req_list_head *req_list=(struct req_list_head *)kmalloc(sizeof(*req_list),GFP_KERNEL);
req_list->current_req=rq;
req_list->current_weight=req_list_count_weight(data_dir,bio_size);
req_list->last_jiffies=jiffies;
req_list_add(req_list,&dd->req_weight_list[data_dir]);

deadline_add_rq_rb(dd, rq);
rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
printk("Quit merge_add_request\n");
}

/*
* remove rq from rbtree and fifo.
*/
static void deadline_remove_request(struct request_queue *q, struct request *rq)
{
struct deadline_data *dd = q->elevator->elevator_data;

rq_fifo_clear(rq);
deadline_del_rq_rb(dd, rq);
}

static int
deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
{

struct deadline_data *dd = q->elevator->elevator_data;
struct request *__rq;
int ret;

/*
* check for front merge
*/
if (dd->front_merges)
{
sector_t sector = bio->bi_sector + bio_sectors(bio);

__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
if (__rq)
{
BUG_ON(sector != blk_rq_pos(__rq));
if (elv_rq_merge_ok(__rq, bio))
{
ret = ELEVATOR_FRONT_MERGE;
goto out;
}
}
}

return ELEVATOR_NO_MERGE;
out:
*req = __rq;
return ret;

}

static void deadline_merged_request(struct request_queue *q,
struct request *req, int type)
{
struct deadline_data *dd = q->elevator->elevator_data;

/*
* if the merge was a front merge, we need to reposition request
*/
if (type == ELEVATOR_FRONT_MERGE)
{
elv_rb_del(deadline_rb_root(dd, req), req);
deadline_add_rq_rb(dd, req);
}
/*
* if the merge was a front merge, we need to reposition request
*/
}

static void
deadline_merged_requests(struct request_queue *q, struct request *req,
struct request *next)
{
/*
* if next expires before rq, assign its expire time to rq
* and move into next position (next will be deleted) in fifo
*/
if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist))
{
if (time_before(rq_fifo_time(next), rq_fifo_time(req)))
{
list_move(&req->queuelist, &next->queuelist);
rq_set_fifo_time(req, rq_fifo_time(next));
}
}

/*
* kill knowledge of next, this one is a goner
*/
deadline_remove_request(q, next);

}

/*
* move request from sort list to dispatch queue.
*/
static inline void
deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
{
struct request_queue *q = rq->q;
const int data_dir = rq_data_dir(rq);
req_list_remove_request(&(dd->req_weight_list[data_dir]));

deadline_remove_request(q, rq);
elv_dispatch_add_tail(q, rq);
}

/*
* move an entry to dispatch queue
*/
static void
deadline_move_request(struct deadline_data *dd, struct request *rq)
{
const int data_dir = rq_data_dir(rq);

dd->next_rq[READ] = NULL;
dd->next_rq[WRITE] = NULL;
dd->next_rq[data_dir] = deadline_latter_request(rq);

dd->last_sector = rq_end_sector(rq);
deadline_move_to_dispatch(dd, rq);
}

/*
* deadline_check_fifo returns 0 if there are no expired requests on the fifo,
* 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
*/
static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
{
struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
if (time_after(jiffies, rq_fifo_time(rq)))
return 1;

return 0;
}

/*
* deadline_dispatch_requests selects the best request according to
* read/write expire, fifo_batch, etc
*/

static int deadline_dispatch_requests(struct request_queue *q, int force)
{
printk("enter merge_dispatch_request\n");
struct deadline_data *dd = q->elevator->elevator_data;
const int reads = !req_list_empty(&dd->req_weight_list[READ]);
const int writes = !req_list_empty(&dd->req_weight_list[WRITE]);
struct request *rq;

if(reads&&writes)
{
rq=select_req_from_weight_list(&dd->req_weight_list[READ],&dd->req_weight_list[WRITE]);
goto dispatch_request;
}
else if(reads)
{
rq=dd->req_weight_list[READ].next->current_req;
goto dispatch_request;
}
else if(writes)
{
rq=dd->req_weight_list[WRITE].next->current_req;
goto dispatch_request;
}
else
return 0;
dispatch_request:
deadline_move_request(dd, rq);
printk("enter merge_dispatch_request\n");
return 1;
}

static int deadline_queue_empty(struct request_queue *q)
{
struct deadline_data *dd = q->elevator->elevator_data;

return req_list_empty(&dd->req_weight_list[WRITE])&&req_list_empty(&dd->req_weight_list[READ]);
}

static void deadline_exit_queue(struct elevator_queue *e)
{
struct deadline_data *dd = e->elevator_data;

BUG_ON(!req_list_empty(&dd->req_weight_list[WRITE]));
BUG_ON(!req_list_empty(&dd->req_weight_list[READ]));


kfree(dd);
}

/*
* initialize elevator private data (deadline_data).
*/
static void *deadline_init_queue(struct request_queue *q)
{

struct deadline_data *dd;
printk("enter merge_init_queue\n");
dd = kmalloc_node(sizeof(*dd), GFP_KERNEL | __GFP_ZERO, q->node);
if (!dd)
return NULL;

Init_req_list_head(&(dd->req_weight_list[READ]));
Init_req_list_head(&(dd->req_weight_list[WRITE]));
dd->fifo_expire[READ] = read_expire;
dd->fifo_expire[WRITE] = write_expire;
dd->writes_starved = writes_starved;
dd->front_merges =0;
dd->fifo_batch = fifo_batch;

/*hl */
INIT_LIST_HEAD(&dd->fifo_list[READ]);
INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
dd->sort_list[READ] = RB_ROOT;
dd->sort_list[WRITE] = RB_ROOT;
printk("quit merge_init_queue\n");
return dd;
}

void Init_req_list_head(struct req_list_head *req_weight_list)
{
req_weight_list->next=req_weight_list;
req_weight_list->prev=req_weight_list;
}

long req_list_count_weight(int data_dir,unsigned int bio_size)
{
long weight;
if(data_dir==READ)
{
weight=read_expire*DELTA*1000-(bio_size)*read_time_per_byte*(10-DELTA)/1000-read_time_once_transfer*10;
return weight;
}
else
{
weight=write_expire*DELTA*1000-(bio_size)*write_time_per_byte*(10-DELTA)/1000-write_time_once_transfer*10;
return weight;
}
}


void update_req_list_weight(struct req_list_head *list_head)
{
if(req_list_empty(list_head))
return ;
struct req_list_head *list_update=list_head;
struct req_list_head *list_update_head=list_head;
while(list_update->next!=list_update_head)
{
list_update=list_update->next;
unsigned long go_by_jiffies;
go_by_jiffies=jiffies - list_update->last_jiffies;
list_update->last_jiffies=jiffies;
list_update->current_weight=list_update->current_weight-go_by_jiffies*1000000*DELTA/HZ;
}
}
void req_list_add(struct req_list_head *new_req,struct req_list_head *list_head)
{
printk("enter merge_req_list_add\n");
update_req_list_weight(list_head);

if(list_head->next==list_head&&list_head->prev==list_head)
{
list_head->next=new_req;
new_req->prev=list_head;
list_head->prev=new_req;
new_req->next=list_head;
return ;
}
struct req_list_head *head_temp=list_head;
struct req_list_head *head_list_temp=list_head;

while(1)
{
if(new_req->current_weight>=head_temp->next->current_weight)
{
if(head_temp->next->next==head_list_temp)
{
struct req_list_head *temp_temp=head_temp->next;
temp_temp->next=new_req;
new_req->next=head_list_temp;
new_req->prev=temp_temp;
head_list_temp->prev=new_req;
break;
}
head_temp=head_temp->next;
}
else
{
struct req_list_head *next_req=head_temp->next;
head_temp->next=new_req;
new_req->next=next_req;
next_req->prev=new_req;
new_req->prev=head_temp;
break;
}
}
printk("quit merge_req_list_add\n");
}

int req_list_empty(struct req_list_head *list_head)
{
if((list_head->next==list_head)&&(list_head->prev==list_head))
return 1;
else
return 0;
}

struct request *select_req_from_weight_list(struct req_list_head *req_read_list,struct req_list_head *req_write_list)
{
update_req_list_weight(req_read_list);
update_req_list_weight(req_write_list);
if(req_write_list->next->current_weight<req_read_list->next->current_weight)
return req_write_list->next->current_req;
else
return req_read_list->next->current_req;
}

void req_list_remove_request(struct req_list_head *list_head)
{
printk("enter req_list_remove_request!\n");
if(!req_list_empty(list_head))
{
struct req_list_head *list_req=list_head->next;
list_head->next=list_req->next;
list_req->next->prev=list_head;
list_req->next=list_req;
list_req->prev=list_req;
kfree(list_req);
}
printk("quit req_list_remove_request!\n");
}

static ssize_t
deadline_var_show(int var, char *page)
{
return sprintf(page, "%d\n", var);
}

static ssize_t
deadline_var_store(int *var, const char *page, size_t count)
{
char *p = (char *) page;

*var = simple_strtol(p, &p, 10);
return count;
}

#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
static ssize_t __FUNC(struct elevator_queue *e, char *page) \
{ \
struct deadline_data *dd = e->elevator_data; \
int __data = __VAR; \
if (__CONV) \
__data = jiffies_to_msecs(__data); \
return deadline_var_show(__data, (page)); \
}
SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
#undef SHOW_FUNCTION

#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
struct deadline_data *dd = e->elevator_data; \
int __data; \
int ret = deadline_var_store(&__data, (page), count); \
if (__data < (MIN)) \
__data = (MIN); \
else if (__data > (MAX)) \
__data = (MAX); \
if (__CONV) \
*(__PTR) = msecs_to_jiffies(__data); \
else \
*(__PTR) = __data; \
return ret; \
}
STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
#undef STORE_FUNCTION

#define DD_ATTR(name) \
__ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
deadline_##name##_store)

static struct elv_fs_entry deadline_attrs[] = {
DD_ATTR(read_expire),
DD_ATTR(write_expire),
DD_ATTR(writes_starved),
DD_ATTR(front_merges),
DD_ATTR(fifo_batch),
__ATTR_NULL
};

static struct elevator_type iosched_deadline_simple = {
.ops = {
.elevator_merge_fn = deadline_merge,
.elevator_merged_fn = deadline_merged_request,
.elevator_merge_req_fn = deadline_merged_requests,
.elevator_dispatch_fn = deadline_dispatch_requests,
.elevator_add_req_fn = deadline_add_request,
.elevator_queue_empty_fn = deadline_queue_empty,
.elevator_former_req_fn = elv_rb_former_request,
.elevator_latter_req_fn = elv_rb_latter_request,
.elevator_init_fn = deadline_init_queue,
.elevator_exit_fn = deadline_exit_queue,
},

.elevator_attrs = deadline_attrs,
.elevator_name = "deadline_simple",
.elevator_owner = THIS_MODULE,
};

static int __init deadline_simple_init(void)
{
elv_register(&iosched_deadline_simple);

return 0;
}

static void __exit deadline_simple_exit(void)
{
elv_unregister(&iosched_deadline_simple);
}

module_init(deadline_simple_init);
module_exit(deadline_simple_exit);

MODULE_AUTHOR("Han");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("deadline_simple IO scheduler");