Re: [PATCH RFC 04/12] xen-blkfront: pre-allocate pages for requests

From: Konrad Rzeszutek Wilk
Date: Tue Mar 05 2013 - 09:19:23 EST


On Tue, Mar 05, 2013 at 12:04:41PM +0100, Roger Pau Monnà wrote:
> On 04/03/13 20:39, Konrad Rzeszutek Wilk wrote:
> > On Thu, Feb 28, 2013 at 11:28:47AM +0100, Roger Pau Monne wrote:
> >> This prevents us from having to call alloc_page while we are preparing
> >> the request. Since blkfront was calling alloc_page with a spinlock
> >> held we used GFP_ATOMIC, which can fail if we are requesting a lot of
> >> pages since it is using the emergency memory pools.
> >>
> >> Allocating all the pages at init prevents us from having to call
> >> alloc_page, thus preventing possible failures.
> >>
> >> Signed-off-by: Roger Pau Monnà <roger.pau@xxxxxxxxxx>
> >> Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
> >> Cc: xen-devel@xxxxxxxxxxxxx
> >> ---
> >> drivers/block/xen-blkfront.c | 120 +++++++++++++++++++++++++++--------------
> >> 1 files changed, 79 insertions(+), 41 deletions(-)
> >>
> >> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> >> index 2e39eaf..5ba6b87 100644
> >> --- a/drivers/block/xen-blkfront.c
> >> +++ b/drivers/block/xen-blkfront.c
> >> @@ -165,6 +165,69 @@ static int add_id_to_freelist(struct blkfront_info *info,
> >> return 0;
> >> }
> >>
> >> +static int fill_grant_buffer(struct blkfront_info *info, int num)
> >> +{
> >> + struct page *granted_page;
> >> + struct grant *gnt_list_entry, *n;
> >> + int i = 0;
> >> +
> >> + while(i < num) {
> >> + gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
> >
> > GFP_NORMAL ?
>
> drivers/block/xen-blkfront.c:175: error: âGFP_NORMALâ undeclared (first
> use in this function)
>
> Did you mean GFP_KERNEL? I think GFP_NOIO is more suitable, it can block
> but no IO will be performed.

<sigh> I meant GFP_KERNEL. Sorry about the incorrect advice. The GFP_KERNEL
is the more general purpose pool - is there a good reason to use _NOIO?
This is after all during initialization when there is no IO using this driver.

>
> >
> >> + if (!gnt_list_entry)
> >> + goto out_of_memory;
> >
> > Hmm, I guess another patch could be to convert this to a fail-safe
> > mechanism. Meaning if we fail here, we just cap our maximum amount of
> > grants we have up to 'i'.
> >
> >
> >> +
> >> + granted_page = alloc_page(GFP_NOIO);
> >
> > GFP_NORMAL

GFP_KERNEL of course.
> >
> >> + if (!granted_page) {
> >> + kfree(gnt_list_entry);
> >> + goto out_of_memory;
> >> + }
> >> +
> >> + gnt_list_entry->pfn = page_to_pfn(granted_page);
> >> + gnt_list_entry->gref = GRANT_INVALID_REF;
> >> + list_add(&gnt_list_entry->node, &info->persistent_gnts);
> >> + i++;
> >> + }
> >> +
> >> + return 0;
> >> +
> >> +out_of_memory:
> >> + list_for_each_entry_safe(gnt_list_entry, n,
> >> + &info->persistent_gnts, node) {
> >> + list_del(&gnt_list_entry->node);
> >> + __free_page(pfn_to_page(gnt_list_entry->pfn));
> >> + kfree(gnt_list_entry);
> >> + i--;
> >> + }
> >> + BUG_ON(i != 0);
> >> + return -ENOMEM;
> >> +}
> >> +
> >> +static struct grant *get_grant(grant_ref_t *gref_head,
> >> + struct blkfront_info *info)
> >> +{
> >> + struct grant *gnt_list_entry;
> >> + unsigned long buffer_mfn;
> >> +
> >> + BUG_ON(list_empty(&info->persistent_gnts));
> >> + gnt_list_entry = list_first_entry(&info->persistent_gnts, struct grant,
> >> + node);
> >> + list_del(&gnt_list_entry->node);
> >> +
> >> + if (gnt_list_entry->gref != GRANT_INVALID_REF) {
> >> + info->persistent_gnts_c--;
> >> + return gnt_list_entry;
> >> + }
> >> +
> >> + /* Assign a gref to this page */
> >> + gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
> >> + BUG_ON(gnt_list_entry->gref == -ENOSPC);
> >> + buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
> >> + gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
> >> + info->xbdev->otherend_id,
> >> + buffer_mfn, 0);
> >> + return gnt_list_entry;
> >> +}
> >> +
> >> static const char *op_name(int op)
> >> {
> >> static const char *const names[] = {
> >> @@ -306,7 +369,6 @@ static int blkif_queue_request(struct request *req)
> >> */
> >> bool new_persistent_gnts;
> >> grant_ref_t gref_head;
> >> - struct page *granted_page;
> >> struct grant *gnt_list_entry = NULL;
> >> struct scatterlist *sg;
> >>
> >> @@ -370,42 +432,9 @@ static int blkif_queue_request(struct request *req)
> >> fsect = sg->offset >> 9;
> >> lsect = fsect + (sg->length >> 9) - 1;
> >>
> >> - if (info->persistent_gnts_c) {
> >> - BUG_ON(list_empty(&info->persistent_gnts));
> >> - gnt_list_entry = list_first_entry(
> >> - &info->persistent_gnts,
> >> - struct grant, node);
> >> - list_del(&gnt_list_entry->node);
> >> -
> >> - ref = gnt_list_entry->gref;
> >> - buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
> >> - info->persistent_gnts_c--;
> >> - } else {
> >> - ref = gnttab_claim_grant_reference(&gref_head);
> >> - BUG_ON(ref == -ENOSPC);
> >> -
> >> - gnt_list_entry =
> >> - kmalloc(sizeof(struct grant),
> >> - GFP_ATOMIC);
> >> - if (!gnt_list_entry)
> >> - return -ENOMEM;
> >> -
> >> - granted_page = alloc_page(GFP_ATOMIC);
> >> - if (!granted_page) {
> >> - kfree(gnt_list_entry);
> >> - return -ENOMEM;
> >> - }
> >> -
> >> - gnt_list_entry->pfn =
> >> - page_to_pfn(granted_page);
> >> - gnt_list_entry->gref = ref;
> >> -
> >> - buffer_mfn = pfn_to_mfn(page_to_pfn(
> >> - granted_page));
> >> - gnttab_grant_foreign_access_ref(ref,
> >> - info->xbdev->otherend_id,
> >> - buffer_mfn, 0);
> >> - }
> >> + gnt_list_entry = get_grant(&gref_head, info);
> >> + ref = gnt_list_entry->gref;
> >> + buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
> >>
> >> info->shadow[id].grants_used[i] = gnt_list_entry;
> >>
> >> @@ -803,17 +832,20 @@ static void blkif_free(struct blkfront_info *info, int suspend)
> >> blk_stop_queue(info->rq);
> >>
> >> /* Remove all persistent grants */
> >> - if (info->persistent_gnts_c) {
> >> + if (!list_empty(&info->persistent_gnts)) {
> >> list_for_each_entry_safe(persistent_gnt, n,
> >> &info->persistent_gnts, node) {
> >> list_del(&persistent_gnt->node);
> >> - gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
> >> + if (persistent_gnt->gref != GRANT_INVALID_REF) {
> >> + gnttab_end_foreign_access(persistent_gnt->gref,
> >> + 0, 0UL);
> >> + info->persistent_gnts_c--;
> >> + }
> >> __free_page(pfn_to_page(persistent_gnt->pfn));
> >> kfree(persistent_gnt);
> >> - info->persistent_gnts_c--;
> >> }
> >> - BUG_ON(info->persistent_gnts_c != 0);
> >> }
> >> + BUG_ON(info->persistent_gnts_c != 0);
> >
> > So if the guest _never_ sent any I/Os and just attached/detached the device - won't
> > we fail here?.
>
> persistent_gnts_c is initialized to 0, so if we don't perform IO it
> should still be 0 at this point. Since we have just cleaned the
> persistent grants lists this should always be 0 at this point.

OK.
>
> >>
> >> /* No more gnttab callback work. */
> >> gnttab_cancel_free_callback(&info->callback);
> >> @@ -1088,6 +1120,12 @@ again:
> >> goto destroy_blkring;
> >> }
> >>
> >> + /* Allocate memory for grants */
> >> + err = fill_grant_buffer(info, BLK_RING_SIZE *
> >> + BLKIF_MAX_SEGMENTS_PER_REQUEST);
> >> + if (err)
> >> + goto out;
> >
> > That looks to be in the wrong function - talk_to_blkback function is
> > to talk to the blkback. Not do initialization type operations.
>
> Yes, I know it's not the best place to place it. It's here mainly
> because that's the only function that gets called by both driver
> initialization and resume.
>
> Last patch moves this to a more sensible place.

Lets make it part of this patch from the start. We still have two
months of time before the next merge window opens - so we have
time to make it nice and clean.

>
> >
> > Also I think this means that on resume - we would try to allocate
> > again the grants?
>
> Yes, grants are cleaned on resume and reallocated.
>
> >> +
> >> xenbus_switch_state(dev, XenbusStateInitialised);
> >>
> >> return 0;
> >> --
> >> 1.7.7.5 (Apple Git-26)
> >>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/