Re: [PATCH] Allow NBD to be used locally

From: Pavel Machek
Date: Sat Feb 02 2008 - 06:23:35 EST


On Fri 2008-02-01 14:25:32, Laurent Vivier wrote:
> This patch allows Network Block Device to be mounted locally.

What is local nbd good for? Use loop instead...

> It creates a kthread to avoid the deadlock described in NBD tools documentation.
> So, if nbd-client hangs waiting pages, the kblockd thread can continue its
> work and free pages.

Hmm, and if there are no other pages that can be freed? Unlikely, but
can happen AFAICT.

Pavel



> Signed-off-by: Laurent Vivier <Laurent.Vivier@xxxxxxxx>
> ---
> drivers/block/nbd.c | 146 ++++++++++++++++++++++++++++++++++-----------------
> include/linux/nbd.h | 4 +-
> 2 files changed, 100 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index b4c0888..de6685e 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -29,6 +29,7 @@
> #include <linux/kernel.h>
> #include <net/sock.h>
> #include <linux/net.h>
> +#include <linux/kthread.h>
>
> #include <asm/uaccess.h>
> #include <asm/system.h>
> @@ -434,6 +435,87 @@ static void nbd_clear_que(struct nbd_device *lo)
> }
>
>
> +static void nbd_handle_req(struct nbd_device *lo, struct request *req)
> +{
> + if (!blk_fs_request(req))
> + goto error_out;
> +
> + nbd_cmd(req) = NBD_CMD_READ;
> + if (rq_data_dir(req) == WRITE) {
> + nbd_cmd(req) = NBD_CMD_WRITE;
> + if (lo->flags & NBD_READ_ONLY) {
> + printk(KERN_ERR "%s: Write on read-only\n",
> + lo->disk->disk_name);
> + goto error_out;
> + }
> + }
> +
> + req->errors = 0;
> +
> + mutex_lock(&lo->tx_lock);
> + if (unlikely(!lo->sock)) {
> + mutex_unlock(&lo->tx_lock);
> + printk(KERN_ERR "%s: Attempted send on closed socket\n",
> + lo->disk->disk_name);
> + req->errors++;
> + nbd_end_request(req);
> + return;
> + }
> +
> + lo->active_req = req;
> +
> + if (nbd_send_req(lo, req) != 0) {
> + printk(KERN_ERR "%s: Request send failed\n",
> + lo->disk->disk_name);
> + req->errors++;
> + nbd_end_request(req);
> + } else {
> + spin_lock(&lo->queue_lock);
> + list_add(&req->queuelist, &lo->queue_head);
> + spin_unlock(&lo->queue_lock);
> + }
> +
> + lo->active_req = NULL;
> + mutex_unlock(&lo->tx_lock);
> + wake_up_all(&lo->active_wq);
> +
> + return;
> +
> +error_out:
> + req->errors++;
> + nbd_end_request(req);
> +}
> +
> +static int nbd_thread(void *data)
> +{
> + struct nbd_device *lo = data;
> + struct request *req;
> +
> + set_user_nice(current, -20);
> + while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) {
> + /* wait something to do */
> + wait_event_interruptible(lo->waiting_wq,
> + kthread_should_stop() ||
> + !list_empty(&lo->waiting_queue));
> +
> + /* extract request */
> +
> + if (list_empty(&lo->waiting_queue))
> + continue;
> +
> + spin_lock_irq(&lo->queue_lock);
> + req = list_entry(lo->waiting_queue.next, struct request,
> + queuelist);
> + list_del_init(&req->queuelist);
> + spin_unlock_irq(&lo->queue_lock);
> +
> + /* handle request */
> +
> + nbd_handle_req(lo, req);
> + }
> + return 0;
> +}
> +
> /*
> * We always wait for result of write, for now. It would be nice to make it optional
> * in future
> @@ -449,65 +531,23 @@ static void do_nbd_request(struct request_queue * q)
> struct nbd_device *lo;
>
> blkdev_dequeue_request(req);
> +
> + spin_unlock_irq(q->queue_lock);
> +
> dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
> req->rq_disk->disk_name, req, req->cmd_type);
>
> - if (!blk_fs_request(req))
> - goto error_out;
> -
> lo = req->rq_disk->private_data;
>
> BUG_ON(lo->magic != LO_MAGIC);
>
> - nbd_cmd(req) = NBD_CMD_READ;
> - if (rq_data_dir(req) == WRITE) {
> - nbd_cmd(req) = NBD_CMD_WRITE;
> - if (lo->flags & NBD_READ_ONLY) {
> - printk(KERN_ERR "%s: Write on read-only\n",
> - lo->disk->disk_name);
> - goto error_out;
> - }
> - }
> + spin_lock_irq(&lo->queue_lock);
> + list_add_tail(&req->queuelist, &lo->waiting_queue);
> + spin_unlock_irq(&lo->queue_lock);
>
> - req->errors = 0;
> - spin_unlock_irq(q->queue_lock);
> -
> - mutex_lock(&lo->tx_lock);
> - if (unlikely(!lo->sock)) {
> - mutex_unlock(&lo->tx_lock);
> - printk(KERN_ERR "%s: Attempted send on closed socket\n",
> - lo->disk->disk_name);
> - req->errors++;
> - nbd_end_request(req);
> - spin_lock_irq(q->queue_lock);
> - continue;
> - }
> -
> - lo->active_req = req;
> -
> - if (nbd_send_req(lo, req) != 0) {
> - printk(KERN_ERR "%s: Request send failed\n",
> - lo->disk->disk_name);
> - req->errors++;
> - nbd_end_request(req);
> - } else {
> - spin_lock(&lo->queue_lock);
> - list_add(&req->queuelist, &lo->queue_head);
> - spin_unlock(&lo->queue_lock);
> - }
> -
> - lo->active_req = NULL;
> - mutex_unlock(&lo->tx_lock);
> - wake_up_all(&lo->active_wq);
> + wake_up(&lo->waiting_wq);
>
> spin_lock_irq(q->queue_lock);
> - continue;
> -
> -error_out:
> - req->errors++;
> - spin_unlock(q->queue_lock);
> - nbd_end_request(req);
> - spin_lock(q->queue_lock);
> }
> }
>
> @@ -517,6 +557,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
> struct nbd_device *lo = inode->i_bdev->bd_disk->private_data;
> int error;
> struct request sreq ;
> + struct task_struct *thread;
>
> if (!capable(CAP_SYS_ADMIN))
> return -EPERM;
> @@ -599,7 +640,12 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
> case NBD_DO_IT:
> if (!lo->file)
> return -EINVAL;
> + thread = kthread_create(nbd_thread, lo, lo->disk->disk_name);
> + if (IS_ERR(thread))
> + return PTR_ERR(thread);
> + wake_up_process(thread);
> error = nbd_do_it(lo);
> + kthread_stop(thread);
> if (error)
> return error;
> sock_shutdown(lo, 1);
> @@ -684,10 +730,12 @@ static int __init nbd_init(void)
> nbd_dev[i].file = NULL;
> nbd_dev[i].magic = LO_MAGIC;
> nbd_dev[i].flags = 0;
> + INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
> spin_lock_init(&nbd_dev[i].queue_lock);
> INIT_LIST_HEAD(&nbd_dev[i].queue_head);
> mutex_init(&nbd_dev[i].tx_lock);
> init_waitqueue_head(&nbd_dev[i].active_wq);
> + init_waitqueue_head(&nbd_dev[i].waiting_wq);
> nbd_dev[i].blksize = 1024;
> nbd_dev[i].bytesize = 0;
> disk->major = NBD_MAJOR;
> diff --git a/include/linux/nbd.h b/include/linux/nbd.h
> index cc2b472..94f40c9 100644
> --- a/include/linux/nbd.h
> +++ b/include/linux/nbd.h
> @@ -57,9 +57,11 @@ struct nbd_device {
> int magic;
>
> spinlock_t queue_lock;
> - struct list_head queue_head;/* Requests are added here... */
> + struct list_head queue_head; /* Requests waiting result */
> struct request *active_req;
> wait_queue_head_t active_wq;
> + struct list_head waiting_queue; /* Requests to be sent */
> + wait_queue_head_t waiting_wq;
>
> struct mutex tx_lock;
> struct gendisk *disk;

--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/