Re: [Xen-devel] [PATCH RFC 4/4] xen-block: introduce a new requesttype to unmap grants

From: Wei Liu
Date: Tue Jul 09 2013 - 09:23:11 EST


On Mon, Jul 08, 2013 at 03:41:52PM -0400, Konrad Rzeszutek Wilk wrote:
> On Mon, Jul 08, 2013 at 03:03:27PM +0200, Roger Pau Monne wrote:
> > Right now blkfront has no way to unmap grant refs, if using persistent
> > grants once a grant is used blkfront cannot assure if blkback will
> > have this grant mapped or not. To solve this problem, a new request
> > type (BLKIF_OP_UNMAP) that allows requesting blkback to unmap certain
> > grants is introduced.
>
> I don't think this is the right way of doing it. It is a new operation
> (BLKIF_OP_UNMAP) that has nothing to do with READ/WRITE. All it is
> is just some way for the frontend to say: unmap this grant if you can.
>
> As such I would think a better mechanism would be to have a new
> grant mechanism that can say: 'I am done with this grant you can
> remove it' - that is called to the hypervisor. The hypervisor
> can then figure out whether it is free or not and lazily delete it.
> (And the guest would be notified when it is freed).
>
> I would presume that this problem would also exist with netback/netfront
> if it started using persisten grants, right?
>

I think so. This issue is generic enough for any backend that uses
persistent grant so I don't think we should limit it to blk only.


Wei.

> >
> > This request can only be used with the indirect operation, since if
> > not using indirect segments the maximum number of grants used will be
> > 352, which is very far from the default maximum number of grants.
> >
> > The requested grefs to unmap are placed inside the indirect pages, so
> > they can be parsed as an array of grant_ref_t once the indirect pages
> > are mapped. This prevents us from introducing a new request struct,
> > since we re-use the same struct from the indirect operation.
> >
> > Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> > Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
> > ---
> > drivers/block/xen-blkback/blkback.c | 108 +++++++++++++++++-
> > drivers/block/xen-blkback/common.h | 12 ++-
> > drivers/block/xen-blkback/xenbus.c | 10 ++
> > drivers/block/xen-blkfront.c | 223 +++++++++++++++++++++++++++++++----
> > include/xen/interface/io/blkif.h | 13 ++
> > 5 files changed, 337 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
> > index bf4b9d2..1def5d2 100644
> > --- a/drivers/block/xen-blkback/blkback.c
> > +++ b/drivers/block/xen-blkback/blkback.c
> > @@ -167,6 +167,9 @@ static int do_block_io_op(struct xen_blkif *blkif);
> > static int dispatch_rw_block_io(struct xen_blkif *blkif,
> > struct blkif_request *req,
> > struct pending_req *pending_req);
> > +static int dispatch_unmap(struct xen_blkif *blkif,
> > + struct blkif_request *req,
> > + struct pending_req *pending_req);
> > static void make_response(struct xen_blkif *blkif, u64 id,
> > unsigned short op, int st);
> >
> > @@ -841,7 +844,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
> > struct blkif_request_segment_aligned *segments = NULL;
> >
> > nseg = pending_req->nr_pages;
> > - indirect_grefs = INDIRECT_PAGES(nseg);
> > + indirect_grefs = INDIRECT_PAGES(nseg, SEGS_PER_INDIRECT_FRAME);
> > BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
> >
> > for (i = 0; i < indirect_grefs; i++)
> > @@ -1064,10 +1067,18 @@ __do_block_io_op(struct xen_blkif *blkif)
> > case BLKIF_OP_WRITE:
> > case BLKIF_OP_WRITE_BARRIER:
> > case BLKIF_OP_FLUSH_DISKCACHE:
> > - case BLKIF_OP_INDIRECT:
> > if (dispatch_rw_block_io(blkif, &req, pending_req))
> > goto done;
> > break;
> > + case BLKIF_OP_INDIRECT:
> > + if (req.u.indirect.indirect_op == BLKIF_OP_UNMAP) {
> > + if (dispatch_unmap(blkif, &req, pending_req))
> > + goto done;
> > + } else {
> > + if (dispatch_rw_block_io(blkif, &req, pending_req))
> > + goto done;
> > + }
> > + break;
> > case BLKIF_OP_DISCARD:
> > free_req(blkif, pending_req);
> > if (dispatch_discard_io(blkif, &req))
> > @@ -1311,7 +1322,100 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
> > return -EIO;
> > }
> >
> > +/*
> > + * Unmap grefs on request from blkfront. This allows blkfront to securely
> > + * free the grefs by making sure blkback no longer has them mapped.
> > + */
> > +static int dispatch_unmap(struct xen_blkif *blkif,
> > + struct blkif_request *req,
> > + struct pending_req *pending_req)
> > +{
> > + unsigned int n_grants, n_pages;
> > + int i, n, rc, segs_to_unmap = 0;
> > + struct grant_page **pages = pending_req->indirect_pages;
> > + grant_ref_t *grefs = NULL;
> > + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > + struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> > + struct persistent_gnt *persistent_gnt;
> > +
> > + if ((req->operation != BLKIF_OP_INDIRECT) ||
> > + (req->u.indirect.indirect_op != BLKIF_OP_UNMAP)) {
> > + pr_debug_ratelimited(DRV_PFX "Invalid indirect operation (%u)\n",
> > + req->u.indirect.indirect_op);
> > + rc = -EINVAL;
> > + goto response;
> > + }
> >
> > + n_grants = req->u.indirect.nr_segments;
> > + if (n_grants > (MAX_INDIRECT_PAGES * GREFS_PER_INDIRECT_FRAME)) {
> > + pr_debug_ratelimited(DRV_PFX "Tried to unmap too many grefs: %u limit: %lu\n",
> > + n_grants, (MAX_INDIRECT_PAGES * GREFS_PER_INDIRECT_FRAME));
> > + rc = -EINVAL;
> > + goto response;
> > + }
> > +
> > + n_pages = INDIRECT_PAGES(n_grants, GREFS_PER_INDIRECT_FRAME);
> > + BUG_ON(n_pages > MAX_INDIRECT_PAGES);
> > + for (i = 0; i < n_pages; i++) {
> > + pages[i]->gref = req->u.indirect.indirect_grefs[i];
> > + }
> > +
> > + rc = xen_blkbk_map(blkif, pages, n_pages, true);
> > + if (rc)
> > + goto unmap;
> > +
> > + for (n = 0, i = 0; n < n_grants; n++) {
> > + if ((n % GREFS_PER_INDIRECT_FRAME) == 0) {
> > + /* Map indirect segments */
> > + if (grefs)
> > + kunmap_atomic(grefs);
> > + grefs = kmap_atomic(pages[n/GREFS_PER_INDIRECT_FRAME]->page);
> > + }
> > + i = n % GREFS_PER_INDIRECT_FRAME;
> > +
> > + persistent_gnt = get_persistent_gnt(blkif, grefs[i]);
> > + if (!persistent_gnt)
> > + continue;
> > +
> > + rb_erase(&persistent_gnt->node, &blkif->persistent_gnts);
> > + blkif->persistent_gnt_c--;
> > + atomic_dec(&blkif->persistent_gnt_in_use);
> > +
> > + gnttab_set_unmap_op(&unmap[segs_to_unmap],
> > + vaddr(persistent_gnt->page),
> > + GNTMAP_host_map,
> > + persistent_gnt->handle);
> > +
> > + unmap_pages[segs_to_unmap] = persistent_gnt->page;
> > +
> > + if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
> > + rc = gnttab_unmap_refs(unmap, NULL, unmap_pages,
> > + segs_to_unmap);
> > + BUG_ON(rc);
> > + put_free_pages(blkif, unmap_pages, segs_to_unmap);
> > + segs_to_unmap = 0;
> > + }
> > + kfree(persistent_gnt);
> > + }
> > +
> > + if (grefs)
> > + kunmap_atomic(grefs);
> > + if (segs_to_unmap > 0) {
> > + rc = gnttab_unmap_refs(unmap, NULL, unmap_pages, segs_to_unmap);
> > + BUG_ON(rc);
> > + put_free_pages(blkif, unmap_pages, segs_to_unmap);
> > + }
> > + rc = 0;
> > +
> > +unmap:
> > + xen_blkbk_unmap(blkif, pages, n_pages);
> > +response:
> > + make_response(blkif, req->u.indirect.id, req->u.indirect.indirect_op,
> > + rc ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY);
> > + free_req(blkif, pending_req);
> > +
> > + return rc;
> > +}
> >
> > /*
> > * Put a response on the ring on how the operation fared.
> > diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
> > index 8d88075..707a2f1 100644
> > --- a/drivers/block/xen-blkback/common.h
> > +++ b/drivers/block/xen-blkback/common.h
> > @@ -58,10 +58,12 @@
> >
> > #define SEGS_PER_INDIRECT_FRAME \
> > (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
> > +#define GREFS_PER_INDIRECT_FRAME \
> > + (PAGE_SIZE/sizeof(grant_ref_t))
> > #define MAX_INDIRECT_PAGES \
> > ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
> > -#define INDIRECT_PAGES(_segs) \
> > - ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
> > +#define INDIRECT_PAGES(_segs, _items_per_page) \
> > + ((_segs + _items_per_page - 1)/_items_per_page)
> >
> > /* Not a real protocol. Used to generate ring structs which contain
> > * the elements common to all protocols only. This way we get a
> > @@ -417,7 +419,8 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
> > dst->u.indirect.id = src->u.indirect.id;
> > dst->u.indirect.sector_number = src->u.indirect.sector_number;
> > barrier();
> > - j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
> > + j = min(MAX_INDIRECT_PAGES,
> > + INDIRECT_PAGES(dst->u.indirect.nr_segments, SEGS_PER_INDIRECT_FRAME));
> > for (i = 0; i < j; i++)
> > dst->u.indirect.indirect_grefs[i] =
> > src->u.indirect.indirect_grefs[i];
> > @@ -465,7 +468,8 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
> > dst->u.indirect.id = src->u.indirect.id;
> > dst->u.indirect.sector_number = src->u.indirect.sector_number;
> > barrier();
> > - j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
> > + j = min(MAX_INDIRECT_PAGES,
> > + INDIRECT_PAGES(dst->u.indirect.nr_segments, SEGS_PER_INDIRECT_FRAME));
> > for (i = 0; i < j; i++)
> > dst->u.indirect.indirect_grefs[i] =
> > src->u.indirect.indirect_grefs[i];
> > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
> > index 2e5b69d..03829c6 100644
> > --- a/drivers/block/xen-blkback/xenbus.c
> > +++ b/drivers/block/xen-blkback/xenbus.c
> > @@ -759,6 +759,16 @@ again:
> > dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
> > dev->nodename, err);
> >
> > + /*
> > + * Since we are going to use the same array to store indirect frames
> > + * we need to calculate how many grefs we can handle based on that
> > + */
> > + err = xenbus_printf(xbt, dev->nodename, "feature-max-unmap-grefs", "%lu",
> > + MAX_INDIRECT_PAGES * GREFS_PER_INDIRECT_FRAME);
> > + if (err)
> > + dev_warn(&dev->dev, "writing %s/feature-max-unmap-grefs (%d)",
> > + dev->nodename, err);
> > +
> > err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
> > (unsigned long long)vbd_sz(&be->blkif->vbd));
> > if (err) {
> > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> > index 3d445c0..c4209f1 100644
> > --- a/drivers/block/xen-blkfront.c
> > +++ b/drivers/block/xen-blkfront.c
> > @@ -45,6 +45,9 @@
> > #include <linux/scatterlist.h>
> > #include <linux/bitmap.h>
> > #include <linux/list.h>
> > +#include <linux/kthread.h>
> > +#include <linux/freezer.h>
> > +#include <linux/delay.h>
> >
> > #include <xen/xen.h>
> > #include <xen/xenbus.h>
> > @@ -77,6 +80,7 @@ struct blk_shadow {
> > struct grant **grants_used;
> > struct grant **indirect_grants;
> > struct scatterlist *sg;
> > + bool in_use;
> > };
> >
> > struct split_bio {
> > @@ -106,6 +110,9 @@ MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default
> > */
> > #define MIN_FREE_GRANTS 512
> >
> > +/* Time between requests to unmap persistent grants (in ms) */
> > +#define LRU_PURGE_INTERVAL 1000
> > +
> > /*
> > * We have one of these per vbd, whether ide, scsi or 'other'. They
> > * hang in private_data off the gendisk structure. We may end up
> > @@ -128,6 +135,7 @@ struct blkfront_info
> > struct gnttab_free_callback callback;
> > struct blk_shadow shadow[BLK_RING_SIZE];
> > struct list_head persistent_gnts;
> > + struct list_head purge_gnts;
> > unsigned int persistent_gnts_c;
> > unsigned long shadow_free;
> > unsigned int feature_flush;
> > @@ -138,7 +146,9 @@ struct blkfront_info
> > unsigned int discard_alignment;
> > unsigned int feature_persistent:1;
> > unsigned int max_indirect_segments;
> > + unsigned int max_unmap_grefs;
> > int is_ready;
> > + struct task_struct *purge_thread;
> > };
> >
> > static unsigned int nr_minors;
> > @@ -168,17 +178,31 @@ static DEFINE_SPINLOCK(minor_lock);
> >
> > #define SEGS_PER_INDIRECT_FRAME \
> > (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
> > -#define INDIRECT_GREFS(_segs) \
> > - ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
> > +#define GREFS_PER_INDIRECT_FRAME \
> > + (PAGE_SIZE/sizeof(grant_ref_t))
> > +#define INDIRECT_GREFS(_segs, _items_per_page) \
> > + ((_segs + _items_per_page - 1)/_items_per_page)
> >
> > static int blkfront_setup_indirect(struct blkfront_info *info);
> >
> > +static inline void flush_requests(struct blkfront_info *info)
> > +{
> > + int notify;
> > +
> > + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
> > +
> > + if (notify)
> > + notify_remote_via_irq(info->irq);
> > +}
> > +
> > static int get_id_from_freelist(struct blkfront_info *info)
> > {
> > unsigned long free = info->shadow_free;
> > BUG_ON(free >= BLK_RING_SIZE);
> > + BUG_ON(info->shadow[free].in_use);
> > info->shadow_free = info->shadow[free].req.u.rw.id;
> > info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
> > + info->shadow[free].in_use = true;
> > return free;
> > }
> >
> > @@ -187,8 +211,9 @@ static int add_id_to_freelist(struct blkfront_info *info,
> > {
> > if (info->shadow[id].req.u.rw.id != id)
> > return -EINVAL;
> > - if (info->shadow[id].request == NULL)
> > + if (!info->shadow[id].in_use)
> > return -EINVAL;
> > + info->shadow[id].in_use = false;
> > info->shadow[id].req.u.rw.id = info->shadow_free;
> > info->shadow[id].request = NULL;
> > info->shadow_free = id;
> > @@ -258,6 +283,88 @@ static struct grant *get_grant(grant_ref_t *gref_head,
> > return gnt_list_entry;
> > }
> >
> > +static int blkif_purge_grants(void *arg)
> > +{
> > + struct blkfront_info *info = arg;
> > + grant_ref_t *grefs = NULL;
> > + struct grant *gnt, *n;
> > + struct blkif_request *ring_req;
> > + unsigned long id;
> > + int num, i, n_pages;
> > +
> > + n_pages = INDIRECT_GREFS(info->max_unmap_grefs, GREFS_PER_INDIRECT_FRAME);
> > +
> > + while (!kthread_should_stop()) {
> > + if (try_to_freeze())
> > + continue;
> > +
> > + if (msleep_interruptible(LRU_PURGE_INTERVAL))
> > + continue;
> > +
> > + spin_lock_irq(&info->io_lock);
> > + if (unlikely(info->connected != BLKIF_STATE_CONNECTED) ||
> > + RING_FULL(&info->ring) ||
> > + (info->max_unmap_grefs + n_pages) > info->persistent_gnts_c ||
> > + !list_empty(&info->purge_gnts)) {
> > + spin_unlock_irq(&info->io_lock);
> > + continue;
> > + }
> > +
> > + /* Set up the ring request */
> > + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
> > + id = get_id_from_freelist(info);
> > + ring_req->operation = BLKIF_OP_INDIRECT;
> > + ring_req->u.indirect.indirect_op = BLKIF_OP_UNMAP;
> > + ring_req->u.indirect.handle = info->handle;
> > + ring_req->u.indirect.id = id;
> > +
> > + for (i = 0; i < n_pages; i++)
> > + info->shadow[id].indirect_grants[i] = NULL;
> > +
> > + num = 0;
> > + BUG_ON(grefs != NULL);
> > + list_for_each_entry_safe_reverse(gnt, n, &info->persistent_gnts, node) {
> > + if (gnt->gref == GRANT_INVALID_REF)
> > + continue;
> > +
> > + i = num / GREFS_PER_INDIRECT_FRAME;
> > + if (((num % GREFS_PER_INDIRECT_FRAME) == 0) &&
> > + (info->shadow[id].indirect_grants[i] == NULL)) {
> > + if (grefs)
> > + kunmap_atomic(grefs);
> > + list_del(&gnt->node);
> > + info->persistent_gnts_c--;
> > + info->shadow[id].indirect_grants[i] = gnt;
> > + grefs = kmap_atomic(pfn_to_page(gnt->pfn));
> > + ring_req->u.indirect.indirect_grefs[i] = gnt->gref;
> > + continue;
> > + }
> > +
> > + list_del(&gnt->node);
> > + list_add(&gnt->node, &info->purge_gnts);
> > + info->persistent_gnts_c--;
> > +
> > + i = num % GREFS_PER_INDIRECT_FRAME;
> > + grefs[i] = gnt->gref;
> > +
> > + if (++num == info->max_unmap_grefs)
> > + break;
> > + }
> > + if (grefs) {
> > + kunmap_atomic(grefs);
> > + grefs = NULL;
> > + }
> > +
> > + ring_req->u.indirect.nr_segments = num;
> > + info->ring.req_prod_pvt++;
> > + info->shadow[id].req = *ring_req;
> > + flush_requests(info);
> > +
> > + spin_unlock_irq(&info->io_lock);
> > + }
> > + return 0;
> > +}
> > +
> > static const char *op_name(int op)
> > {
> > static const char *const names[] = {
> > @@ -265,7 +372,9 @@ static const char *op_name(int op)
> > [BLKIF_OP_WRITE] = "write",
> > [BLKIF_OP_WRITE_BARRIER] = "barrier",
> > [BLKIF_OP_FLUSH_DISKCACHE] = "flush",
> > - [BLKIF_OP_DISCARD] = "discard" };
> > + [BLKIF_OP_DISCARD] = "discard",
> > + [BLKIF_OP_INDIRECT] = "indirect",
> > + [BLKIF_OP_UNMAP] = "unmap", };
> >
> > if (op < 0 || op >= ARRAY_SIZE(names))
> > return "unknown";
> > @@ -412,7 +521,7 @@ static int blkif_queue_request(struct request *req)
> > * If we are using indirect segments we need to account
> > * for the indirect grefs used in the request.
> > */
> > - max_grefs += INDIRECT_GREFS(req->nr_phys_segments);
> > + max_grefs += INDIRECT_GREFS(req->nr_phys_segments, SEGS_PER_INDIRECT_FRAME);
> >
> > /* Check if we have enough grants to allocate a requests */
> > if (info->persistent_gnts_c < max_grefs) {
> > @@ -556,17 +665,6 @@ static int blkif_queue_request(struct request *req)
> > return 0;
> > }
> >
> > -
> > -static inline void flush_requests(struct blkfront_info *info)
> > -{
> > - int notify;
> > -
> > - RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
> > -
> > - if (notify)
> > - notify_remote_via_irq(info->irq);
> > -}
> > -
> > /*
> > * do_blkif_request
> > * read a block; request is in a request queue
> > @@ -904,6 +1002,12 @@ static void blkif_free(struct blkfront_info *info, int suspend)
> > struct grant *n;
> > int i, j, segs;
> >
> > + /* Remove purge_thread */
> > + if (info->purge_thread) {
> > + kthread_stop(info->purge_thread);
> > + info->purge_thread = NULL;
> > + }
> > +
> > /* Prevent new requests being issued until we fix things up. */
> > spin_lock_irq(&info->io_lock);
> > info->connected = suspend ?
> > @@ -928,17 +1032,38 @@ static void blkif_free(struct blkfront_info *info, int suspend)
> > }
> > BUG_ON(info->persistent_gnts_c != 0);
> >
> > + /* Remove grants waiting for purge */
> > + if (!list_empty(&info->purge_gnts)) {
> > + list_for_each_entry_safe(persistent_gnt, n,
> > + &info->purge_gnts, node) {
> > + list_del(&persistent_gnt->node);
> > + BUG_ON(persistent_gnt->gref == GRANT_INVALID_REF);
> > + gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
> > + __free_page(pfn_to_page(persistent_gnt->pfn));
> > + kfree(persistent_gnt);
> > + }
> > + }
> > +
> > for (i = 0; i < BLK_RING_SIZE; i++) {
> > + int n_pages;
> > /*
> > * Clear persistent grants present in requests already
> > * on the shared ring
> > */
> > - if (!info->shadow[i].request)
> > + if (!info->shadow[i].in_use)
> > goto free_shadow;
> >
> > segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
> > info->shadow[i].req.u.indirect.nr_segments :
> > info->shadow[i].req.u.rw.nr_segments;
> > +
> > + if (info->shadow[i].req.operation == BLKIF_OP_INDIRECT &&
> > + info->shadow[i].req.u.indirect.indirect_op == BLKIF_OP_UNMAP) {
> > + n_pages = INDIRECT_GREFS(segs, GREFS_PER_INDIRECT_FRAME);
> > + segs = 0;
> > + } else
> > + n_pages = INDIRECT_GREFS(segs, SEGS_PER_INDIRECT_FRAME);
> > +
> > for (j = 0; j < segs; j++) {
> > persistent_gnt = info->shadow[i].grants_used[j];
> > gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
> > @@ -953,7 +1078,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
> > */
> > goto free_shadow;
> >
> > - for (j = 0; j < INDIRECT_GREFS(segs); j++) {
> > + for (j = 0; j < n_pages; j++) {
> > persistent_gnt = info->shadow[i].indirect_grants[j];
> > gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
> > __free_page(pfn_to_page(persistent_gnt->pfn));
> > @@ -996,10 +1121,16 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
> > struct scatterlist *sg;
> > char *bvec_data;
> > void *shared_data;
> > - int nseg;
> > + int nseg, npages;
> >
> > nseg = s->req.operation == BLKIF_OP_INDIRECT ?
> > s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
> > + if (bret->operation == BLKIF_OP_UNMAP) {
> > + npages = INDIRECT_GREFS(nseg, GREFS_PER_INDIRECT_FRAME);
> > + nseg = 0;
> > + } else {
> > + npages = INDIRECT_GREFS(nseg, SEGS_PER_INDIRECT_FRAME);
> > + }
> >
> > if (bret->operation == BLKIF_OP_READ) {
> > /*
> > @@ -1026,7 +1157,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
> > info->persistent_gnts_c++;
> > }
> > if (s->req.operation == BLKIF_OP_INDIRECT) {
> > - for (i = 0; i < INDIRECT_GREFS(nseg); i++) {
> > + for (i = 0; i < npages; i++) {
> > list_add(&s->indirect_grants[i]->node, &info->persistent_gnts);
> > info->persistent_gnts_c++;
> > }
> > @@ -1041,6 +1172,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
> > unsigned long flags;
> > struct blkfront_info *info = (struct blkfront_info *)dev_id;
> > int error;
> > + struct grant *gnt, *n;
> >
> > spin_lock_irqsave(&info->io_lock, flags);
> >
> > @@ -1125,6 +1257,22 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
> >
> > __blk_end_request_all(req, error);
> > break;
> > + case BLKIF_OP_UNMAP:
> > + /* Remove access to the grants and add them back to the list */
> > + if (unlikely(bret->status != BLKIF_RSP_OKAY))
> > + pr_warn_ratelimited("blkfront: unmap operation returned !BLKIF_RSP_OKAY");
> > + list_for_each_entry_safe(gnt, n, &info->purge_gnts, node) {
> > + list_del(&gnt->node);
> > + if (bret->status == BLKIF_RSP_OKAY) {
> > + gnttab_end_foreign_access(gnt->gref, 0, 0UL);
> > + gnt->gref = GRANT_INVALID_REF;
> > + list_add_tail(&gnt->node, &info->persistent_gnts);
> > + } else {
> > + list_add(&gnt->node, &info->persistent_gnts);
> > + info->persistent_gnts_c++;
> > + }
> > + }
> > + break;
> > default:
> > BUG();
> > }
> > @@ -1323,6 +1471,7 @@ static int blkfront_probe(struct xenbus_device *dev,
> > info->xbdev = dev;
> > info->vdevice = vdevice;
> > INIT_LIST_HEAD(&info->persistent_gnts);
> > + INIT_LIST_HEAD(&info->purge_gnts);
> > info->persistent_gnts_c = 0;
> > info->connected = BLKIF_STATE_DISCONNECTED;
> > INIT_WORK(&info->work, blkif_restart_queue);
> > @@ -1650,7 +1799,7 @@ static void blkfront_setup_discard(struct blkfront_info *info)
> >
> > static int blkfront_setup_indirect(struct blkfront_info *info)
> > {
> > - unsigned int indirect_segments, segs;
> > + unsigned int indirect_segments, segs, indirect_unmap;
> > int err, i;
> >
> > err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
> > @@ -1665,7 +1814,24 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
> > segs = info->max_indirect_segments;
> > }
> >
> > - err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
> > + /* Check if backend supports unmapping persistent grants */
> > + err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
> > + "feature-max-unmap-grefs", "%u", &indirect_unmap,
> > + NULL);
> > + if (err)
> > + info->max_unmap_grefs = 0;
> > + else
> > + /*
> > + * Since we don't allocate grants dynamically we need to make
> > + * sure that the unmap operation doesn't use more grants than
> > + * a normal indirect operation, or we might exhaust the buffer
> > + * of pre-allocated grants
> > + */
> > + info->max_unmap_grefs = min(segs, indirect_unmap);
> > +
> > +
> > + err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs, SEGS_PER_INDIRECT_FRAME)) *
> > + BLK_RING_SIZE);
> > if (err)
> > goto out_of_memory;
> >
> > @@ -1677,7 +1843,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
> > if (info->max_indirect_segments)
> > info->shadow[i].indirect_grants = kzalloc(
> > sizeof(info->shadow[i].indirect_grants[0]) *
> > - INDIRECT_GREFS(segs),
> > + INDIRECT_GREFS(segs, SEGS_PER_INDIRECT_FRAME),
> > GFP_NOIO);
> > if ((info->shadow[i].grants_used == NULL) ||
> > (info->shadow[i].sg == NULL) ||
> > @@ -1687,6 +1853,17 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
> > sg_init_table(info->shadow[i].sg, segs);
> > }
> >
> > + if (info->max_unmap_grefs) {
> > + BUG_ON(info->purge_thread != NULL);
> > + info->purge_thread = kthread_run(blkif_purge_grants, info, "%s/purge",
> > + info->xbdev->nodename);
> > + if (IS_ERR(info->purge_thread)) {
> > + err = PTR_ERR(info->purge_thread);
> > + info->purge_thread = NULL;
> > + pr_alert("unable to start purge thread");
> > + return err;
> > + }
> > + }
> >
> > return 0;
> >
> > diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
> > index 65e1209..20fcbfd 100644
> > --- a/include/xen/interface/io/blkif.h
> > +++ b/include/xen/interface/io/blkif.h
> > @@ -127,6 +127,19 @@ typedef uint64_t blkif_sector_t;
> > #define BLKIF_OP_INDIRECT 6
> >
> > /*
> > + * Recognized if "feature-max-unmap-grefs" is present in the backend xenbus
> > + * info. The "feature-max-unmap-grefs" node contains the maximum number of grefs
> > + * allowed by the backend per request.
> > + *
> > + * This operation can only be used as an indirect operation. blkfront might use
> > + * this operation to request blkback to unmap certain grants, so they can be
> > + * freed by blkfront. The grants to unmap are placed inside the indirect pages
> > + * as an array of grant_ref_t, and nr_segments contains the number of grefs
> > + * to unmap.
> > + */
> > +#define BLKIF_OP_UNMAP 7
> > +
> > +/*
> > * Maximum scatter/gather segments per request.
> > * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
> > * NB. This could be 12 if the ring indexes weren't stored in the same page.
> > --
> > 1.7.7.5 (Apple Git-26)
> >
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/