[PATCH v5 10/10] xen/blkback: make pool of persistent grants and free pages per-queue

From: Bob Liu
Date: Fri Nov 13 2015 - 22:13:20 EST


Make pool of persistent grants and free pages per-queue/ring instead of
per-device to get better scalability.

Test was done based on null_blk driver:
dom0: v4.2-rc8 16vcpus 10GB "modprobe null_blk"
domu: v4.2-rc8 16vcpus 10GB

[test]
rw=read
direct=1
ioengine=libaio
bs=4k
time_based
runtime=30
filename=/dev/xvdb
numjobs=16
iodepth=64
iodepth_batch=64
iodepth_batch_complete=64
group_reporting

Results:
iops1: After commit("xen/blkfront: make persistent grants per-queue").
iops2: After this commit.

Queues: 1 4 8 16
Iops orig(k): 810 1064 780 700
Iops1(k): 810 1230(~20%) 1024(~20%) 850(~20%)
Iops2(k): 810 1410(~35%) 1354(~75%) 1440(~100%)

With 4 queues after this commit we can get ~75% increase in IOPS, and
performance won't drop if incresing queue numbers.

Please find the respective chart in this link:
https://www.dropbox.com/s/agrcy2pbzbsvmwv/iops.png?dl=0

Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx>
---
drivers/block/xen-blkback/blkback.c | 202 ++++++++++++++++-------------------
drivers/block/xen-blkback/common.h | 32 +++---
drivers/block/xen-blkback/xenbus.c | 21 ++--
3 files changed, 118 insertions(+), 137 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index acedc46..0e8a04d 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -122,60 +122,60 @@ module_param(log_stats, int, 0644);
/* Number of free pages to remove on each call to gnttab_free_pages */
#define NUM_BATCH_FREE_PAGES 10

-static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
+static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
{
unsigned long flags;

- spin_lock_irqsave(&blkif->free_pages_lock, flags);
- if (list_empty(&blkif->free_pages)) {
- BUG_ON(blkif->free_pages_num != 0);
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ spin_lock_irqsave(&ring->free_pages_lock, flags);
+ if (list_empty(&ring->free_pages)) {
+ BUG_ON(ring->free_pages_num != 0);
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
return gnttab_alloc_pages(1, page);
}
- BUG_ON(blkif->free_pages_num == 0);
- page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
+ BUG_ON(ring->free_pages_num == 0);
+ page[0] = list_first_entry(&ring->free_pages, struct page, lru);
list_del(&page[0]->lru);
- blkif->free_pages_num--;
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ ring->free_pages_num--;
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);

return 0;
}

-static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
+static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
int num)
{
unsigned long flags;
int i;

- spin_lock_irqsave(&blkif->free_pages_lock, flags);
+ spin_lock_irqsave(&ring->free_pages_lock, flags);
for (i = 0; i < num; i++)
- list_add(&page[i]->lru, &blkif->free_pages);
- blkif->free_pages_num += num;
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ list_add(&page[i]->lru, &ring->free_pages);
+ ring->free_pages_num += num;
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
}

-static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
+static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
{
/* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
struct page *page[NUM_BATCH_FREE_PAGES];
unsigned int num_pages = 0;
unsigned long flags;

- spin_lock_irqsave(&blkif->free_pages_lock, flags);
- while (blkif->free_pages_num > num) {
- BUG_ON(list_empty(&blkif->free_pages));
- page[num_pages] = list_first_entry(&blkif->free_pages,
+ spin_lock_irqsave(&ring->free_pages_lock, flags);
+ while (ring->free_pages_num > num) {
+ BUG_ON(list_empty(&ring->free_pages));
+ page[num_pages] = list_first_entry(&ring->free_pages,
struct page, lru);
list_del(&page[num_pages]->lru);
- blkif->free_pages_num--;
+ ring->free_pages_num--;
if (++num_pages == NUM_BATCH_FREE_PAGES) {
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
gnttab_free_pages(num_pages, page);
- spin_lock_irqsave(&blkif->free_pages_lock, flags);
+ spin_lock_irqsave(&ring->free_pages_lock, flags);
num_pages = 0;
}
}
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
if (num_pages != 0)
gnttab_free_pages(num_pages, page);
}
@@ -198,23 +198,29 @@ static void make_response(struct xen_blkif_ring *ring, u64 id,


/*
- * pers_gnts_lock must be used around all the persistent grant helpers
- * because blkback may use multi-thread/queue for each backend.
+ * We don't need locking around the persistent grant helpers
+ * because blkback uses a single-thread for each backend, so we
+ * can be sure that this functions will never be called recursively.
+ *
+ * The only exception to that is put_persistent_grant, that can be called
+ * from interrupt context (by xen_blkbk_unmap), so we have to use atomic
+ * bit operations to modify the flags of a persistent grant and to count
+ * the number of used grants.
*/
-static int add_persistent_gnt(struct xen_blkif *blkif,
+static int add_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *persistent_gnt)
{
struct rb_node **new = NULL, *parent = NULL;
struct persistent_gnt *this;
+ struct xen_blkif *blkif = ring->blkif;

- BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
- if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
+ if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
if (!blkif->vbd.overflow_max_grants)
blkif->vbd.overflow_max_grants = 1;
return -EBUSY;
}
/* Figure out where to put new node */
- new = &blkif->persistent_gnts.rb_node;
+ new = &ring->persistent_gnts.rb_node;
while (*new) {
this = container_of(*new, struct persistent_gnt, node);

@@ -233,20 +239,19 @@ static int add_persistent_gnt(struct xen_blkif *blkif,
set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
/* Add new node and rebalance tree. */
rb_link_node(&(persistent_gnt->node), parent, new);
- rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
- blkif->persistent_gnt_c++;
- atomic_inc(&blkif->persistent_gnt_in_use);
+ rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
+ ring->persistent_gnt_c++;
+ atomic_inc(&ring->persistent_gnt_in_use);
return 0;
}

-static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
+static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
grant_ref_t gref)
{
struct persistent_gnt *data;
struct rb_node *node = NULL;

- BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
- node = blkif->persistent_gnts.rb_node;
+ node = ring->persistent_gnts.rb_node;
while (node) {
data = container_of(node, struct persistent_gnt, node);

@@ -260,25 +265,24 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
return NULL;
}
set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
- atomic_inc(&blkif->persistent_gnt_in_use);
+ atomic_inc(&ring->persistent_gnt_in_use);
return data;
}
}
return NULL;
}

-static void put_persistent_gnt(struct xen_blkif *blkif,
+static void put_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *persistent_gnt)
{
- BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
pr_alert_ratelimited("freeing a grant already unused\n");
set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
- atomic_dec(&blkif->persistent_gnt_in_use);
+ atomic_dec(&ring->persistent_gnt_in_use);
}

-static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
+static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *root,
unsigned int num)
{
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -292,7 +296,6 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
unmap_data.unmap_ops = unmap;
unmap_data.kunmap_ops = NULL;

- BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
foreach_grant_safe(persistent_gnt, n, root, node) {
BUG_ON(persistent_gnt->handle ==
BLKBACK_INVALID_HANDLE);
@@ -310,7 +313,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
unmap_data.count = segs_to_unmap;
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));

- put_free_pages(blkif, pages, segs_to_unmap);
+ put_free_pages(ring, pages, segs_to_unmap);
segs_to_unmap = 0;
}

@@ -327,17 +330,15 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct persistent_gnt *persistent_gnt;
int segs_to_unmap = 0;
- struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+ struct xen_blkif_ring *ring = container_of(work, typeof(*ring), persistent_purge_work);
struct gntab_unmap_queue_data unmap_data;
- unsigned long flags;

unmap_data.pages = pages;
unmap_data.unmap_ops = unmap;
unmap_data.kunmap_ops = NULL;

- spin_lock_irqsave(&blkif->pers_gnts_lock, flags);
- while(!list_empty(&blkif->persistent_purge_list)) {
- persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
+ while(!list_empty(&ring->persistent_purge_list)) {
+ persistent_gnt = list_first_entry(&ring->persistent_purge_list,
struct persistent_gnt,
remove_node);
list_del(&persistent_gnt->remove_node);
@@ -352,45 +353,42 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
unmap_data.count = segs_to_unmap;
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
- put_free_pages(blkif, pages, segs_to_unmap);
+ put_free_pages(ring, pages, segs_to_unmap);
segs_to_unmap = 0;
}
kfree(persistent_gnt);
}
- spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
if (segs_to_unmap > 0) {
unmap_data.count = segs_to_unmap;
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
- put_free_pages(blkif, pages, segs_to_unmap);
+ put_free_pages(ring, pages, segs_to_unmap);
}
}

-static void purge_persistent_gnt(struct xen_blkif *blkif)
+static void purge_persistent_gnt(struct xen_blkif_ring *ring)
{
struct persistent_gnt *persistent_gnt;
struct rb_node *n;
unsigned int num_clean, total;
bool scan_used = false, clean_used = false;
struct rb_root *root;
- unsigned long flags;

- spin_lock_irqsave(&blkif->pers_gnts_lock, flags);
- if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
- (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
- !blkif->vbd.overflow_max_grants)) {
+ if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
+ (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
+ !ring->blkif->vbd.overflow_max_grants)) {
goto out;
}

- if (work_busy(&blkif->persistent_purge_work)) {
+ if (work_busy(&ring->persistent_purge_work)) {
pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
goto out;
}

num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
- num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
- num_clean = min(blkif->persistent_gnt_c, num_clean);
+ num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
+ num_clean = min(ring->persistent_gnt_c, num_clean);
if ((num_clean == 0) ||
- (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
+ (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
goto out;

/*
@@ -406,8 +404,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)

pr_debug("Going to purge %u persistent grants\n", num_clean);

- BUG_ON(!list_empty(&blkif->persistent_purge_list));
- root = &blkif->persistent_gnts;
+ BUG_ON(!list_empty(&ring->persistent_purge_list));
+ root = &ring->persistent_gnts;
purge_list:
foreach_grant_safe(persistent_gnt, n, root, node) {
BUG_ON(persistent_gnt->handle ==
@@ -426,7 +424,7 @@ purge_list:

rb_erase(&persistent_gnt->node, root);
list_add(&persistent_gnt->remove_node,
- &blkif->persistent_purge_list);
+ &ring->persistent_purge_list);
if (--num_clean == 0)
goto finished;
}
@@ -447,18 +445,14 @@ finished:
goto purge_list;
}

- blkif->persistent_gnt_c -= (total - num_clean);
- spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
- blkif->vbd.overflow_max_grants = 0;
+ ring->persistent_gnt_c -= (total - num_clean);
+ ring->blkif->vbd.overflow_max_grants = 0;

/* We can defer this work */
- schedule_work(&blkif->persistent_purge_work);
+ schedule_work(&ring->persistent_purge_work);
pr_debug("Purged %u/%u\n", (total - num_clean), total);
- return;

out:
- spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
-
return;
}

@@ -590,14 +584,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
* SCHEDULER FUNCTIONS
*/

-static void print_stats(struct xen_blkif *blkif)
+static void print_stats(struct xen_blkif_ring *ring)
{
+ struct xen_blkif *blkif = ring->blkif;
+
pr_info("(%s): oo %3llu | rd %4llu | wr %4llu | f %4llu"
" | ds %4llu | pg: %4u/%4d\n",
current->comm, blkif->st_oo_req,
blkif->st_rd_req, blkif->st_wr_req,
blkif->st_f_req, blkif->st_ds_req,
- blkif->persistent_gnt_c,
+ ring->persistent_gnt_c,
xen_blkif_max_pgrants);
blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
blkif->st_rd_req = 0;
@@ -650,23 +646,23 @@ int xen_blkif_schedule(void *arg)

purge_gnt_list:
if (blkif->vbd.feature_gnt_persistent &&
- time_after(jiffies, blkif->next_lru)) {
- purge_persistent_gnt(blkif);
- blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
+ time_after(jiffies, ring->next_lru)) {
+ purge_persistent_gnt(ring);
+ ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
}

/* Shrink if we have more than xen_blkif_max_buffer_pages */
- shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
+ shrink_free_pagepool(ring, xen_blkif_max_buffer_pages);

- if (log_stats && time_after(jiffies, blkif->st_print))
- print_stats(blkif);
+ if (log_stats && time_after(jiffies, ring->blkif->st_print))
+ print_stats(ring);
}

/* Drain pending purge work */
- flush_work(&blkif->persistent_purge_work);
+ flush_work(&ring->persistent_purge_work);

if (log_stats)
- print_stats(blkif);
+ print_stats(ring);

ring->xenblkd = NULL;
xen_blkif_put(blkif);
@@ -679,21 +675,16 @@ purge_gnt_list:
*/
void xen_blkbk_free_caches(struct xen_blkif_ring *ring)
{
- struct xen_blkif *blkif = ring->blkif;
- unsigned long flags;
-
/* Free all persistent grant pages */
- spin_lock_irqsave(&blkif->pers_gnts_lock, flags);
- if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
- free_persistent_gnts(blkif, &blkif->persistent_gnts,
- blkif->persistent_gnt_c);
+ if (!RB_EMPTY_ROOT(&ring->persistent_gnts))
+ free_persistent_gnts(ring, &ring->persistent_gnts,
+ ring->persistent_gnt_c);

- BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
- blkif->persistent_gnt_c = 0;
- spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
+ BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
+ ring->persistent_gnt_c = 0;

/* Since we are shutting down remove all pages from the buffer */
- shrink_free_pagepool(blkif, 0 /* All */);
+ shrink_free_pagepool(ring, 0 /* All */);
}

static unsigned int xen_blkbk_unmap_prepare(
@@ -704,13 +695,10 @@ static unsigned int xen_blkbk_unmap_prepare(
struct page **unmap_pages)
{
unsigned int i, invcount = 0;
- unsigned long flags;

for (i = 0; i < num; i++) {
if (pages[i]->persistent_gnt != NULL) {
- spin_lock_irqsave(&ring->blkif->pers_gnts_lock, flags);
- put_persistent_gnt(ring->blkif, pages[i]->persistent_gnt);
- spin_unlock_irqrestore(&ring->blkif->pers_gnts_lock, flags);
+ put_persistent_gnt(ring, pages[i]->persistent_gnt);
continue;
}
if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
@@ -735,7 +723,7 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_
but is this the best way to deal with this? */
BUG_ON(result);

- put_free_pages(blkif, data->pages, data->count);
+ put_free_pages(ring, data->pages, data->count);
make_response(ring, pending_req->id,
pending_req->operation, pending_req->status);
free_req(ring, pending_req);
@@ -802,7 +790,7 @@ static void xen_blkbk_unmap(struct xen_blkif_ring *ring,
if (invcount) {
ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
BUG_ON(ret);
- put_free_pages(ring->blkif, unmap_pages, invcount);
+ put_free_pages(ring, unmap_pages, invcount);
}
pages += batch;
num -= batch;
@@ -823,7 +811,6 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
int last_map = 0, map_until = 0;
int use_persistent_gnts;
struct xen_blkif *blkif = ring->blkif;
- unsigned long irq_flags;

use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);

@@ -837,11 +824,9 @@ again:
uint32_t flags;

if (use_persistent_gnts) {
- spin_lock_irqsave(&blkif->pers_gnts_lock, irq_flags);
persistent_gnt = get_persistent_gnt(
- blkif,
+ ring,
pages[i]->gref);
- spin_unlock_irqrestore(&blkif->pers_gnts_lock, irq_flags);
}

if (persistent_gnt) {
@@ -852,7 +837,7 @@ again:
pages[i]->page = persistent_gnt->page;
pages[i]->persistent_gnt = persistent_gnt;
} else {
- if (get_free_page(blkif, &pages[i]->page))
+ if (get_free_page(ring, &pages[i]->page))
goto out_of_memory;
addr = vaddr(pages[i]->page);
pages_to_gnt[segs_to_map] = pages[i]->page;
@@ -885,7 +870,7 @@ again:
BUG_ON(new_map_idx >= segs_to_map);
if (unlikely(map[new_map_idx].status != 0)) {
pr_debug("invalid buffer -- could not remap it\n");
- put_free_pages(blkif, &pages[seg_idx]->page, 1);
+ put_free_pages(ring, &pages[seg_idx]->page, 1);
pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
ret |= 1;
goto next;
@@ -895,7 +880,7 @@ again:
continue;
}
if (use_persistent_gnts &&
- blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
+ ring->persistent_gnt_c < xen_blkif_max_pgrants) {
/*
* We are using persistent grants, the grant is
* not mapped but we might have room for it.
@@ -913,19 +898,16 @@ again:
persistent_gnt->gnt = map[new_map_idx].ref;
persistent_gnt->handle = map[new_map_idx].handle;
persistent_gnt->page = pages[seg_idx]->page;
- spin_lock_irqsave(&blkif->pers_gnts_lock, irq_flags);
- if (add_persistent_gnt(blkif,
+ if (add_persistent_gnt(ring,
persistent_gnt)) {
- spin_unlock_irqrestore(&blkif->pers_gnts_lock, irq_flags);
kfree(persistent_gnt);
persistent_gnt = NULL;
goto next;
}
pages[seg_idx]->persistent_gnt = persistent_gnt;
pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
- persistent_gnt->gnt, blkif->persistent_gnt_c,
+ persistent_gnt->gnt, ring->persistent_gnt_c,
xen_blkif_max_pgrants);
- spin_unlock_irqrestore(&blkif->pers_gnts_lock, irq_flags);
goto next;
}
if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
@@ -949,7 +931,7 @@ next:

out_of_memory:
pr_alert("%s: out of memory\n", __func__);
- put_free_pages(blkif, pages_to_gnt, segs_to_map);
+ put_free_pages(ring, pages_to_gnt, segs_to_map);
return -ENOMEM;
}

diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 0833dc6..54f126e 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -291,6 +291,22 @@ struct xen_blkif_ring {
spinlock_t pending_free_lock;
wait_queue_head_t pending_free_wq;

+ /* Tree to store persistent grants. */
+ spinlock_t pers_gnts_lock;
+ struct rb_root persistent_gnts;
+ unsigned int persistent_gnt_c;
+ atomic_t persistent_gnt_in_use;
+ unsigned long next_lru;
+
+ /* Used by the kworker that offload work from the persistent purge. */
+ struct list_head persistent_purge_list;
+ struct work_struct persistent_purge_work;
+
+ /* Buffer of free pages to map grant refs. */
+ spinlock_t free_pages_lock;
+ int free_pages_num;
+ struct list_head free_pages;
+
struct work_struct free_work;
/* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq;
@@ -312,22 +328,6 @@ struct xen_blkif {
struct completion drain_complete;
atomic_t drain;

- /* tree to store persistent grants */
- spinlock_t pers_gnts_lock;
- struct rb_root persistent_gnts;
- unsigned int persistent_gnt_c;
- atomic_t persistent_gnt_in_use;
- unsigned long next_lru;
-
- /* used by the kworker that offload work from the persistent purge */
- struct list_head persistent_purge_list;
- struct work_struct persistent_purge_work;
-
- /* buffer of free pages to map grant refs */
- spinlock_t free_pages_lock;
- int free_pages_num;
- struct list_head free_pages;
-
/* statistics */
unsigned long st_print;
unsigned long long st_rd_req;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index d83b790..db8ad57 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -150,6 +150,10 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
spin_lock_init(&ring->blk_ring_lock);
init_waitqueue_head(&ring->wq);
INIT_LIST_HEAD(&ring->pending_free);
+ INIT_LIST_HEAD(&ring->persistent_purge_list);
+ INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
+ spin_lock_init(&ring->free_pages_lock);
+ INIT_LIST_HEAD(&ring->free_pages);

spin_lock_init(&ring->pending_free_lock);
init_waitqueue_head(&ring->pending_free_wq);
@@ -175,11 +179,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
atomic_set(&blkif->refcnt, 1);
init_completion(&blkif->drain_complete);
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
- spin_lock_init(&blkif->free_pages_lock);
- INIT_LIST_HEAD(&blkif->free_pages);
- INIT_LIST_HEAD(&blkif->persistent_purge_list);
blkif->st_print = jiffies;
- INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);

return blkif;
}
@@ -290,6 +290,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
i++;
}

+ BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
+ BUG_ON(!list_empty(&ring->persistent_purge_list));
+ BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
+ BUG_ON(!list_empty(&ring->free_pages));
+ BUG_ON(ring->free_pages_num != 0);
+ BUG_ON(ring->persistent_gnt_c != 0);
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
}
blkif->nr_ring_pages = 0;
@@ -304,13 +310,6 @@ static void xen_blkif_free(struct xen_blkif *blkif)
xen_vbd_free(&blkif->vbd);

/* Make sure everything is drained before shutting down */
- BUG_ON(blkif->persistent_gnt_c != 0);
- BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
- BUG_ON(blkif->free_pages_num != 0);
- BUG_ON(!list_empty(&blkif->persistent_purge_list));
- BUG_ON(!list_empty(&blkif->free_pages));
- BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
-
kfree(blkif->rings);
kmem_cache_free(xen_blkif_cachep, blkif);
}
--
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/