Re: [PATCH v2 10/13] xen/pvcalls: implement poll command
From: Stefano Stabellini
Date: Wed Jul 26 2017 - 20:21:16 EST
On Wed, 26 Jul 2017, Boris Ostrovsky wrote:
> On 07/25/2017 05:22 PM, Stefano Stabellini wrote:
> > For active sockets, check the indexes and use the inflight_conn_req
> > waitqueue to wait.
> >
> > For passive sockets, send PVCALLS_POLL to the backend. Use the
> > inflight_accept_req waitqueue if an accept is outstanding. Otherwise use
> > the inflight_req waitqueue: inflight_req is awaken when a new response
> > is received; on wakeup we check whether the POLL response is arrived by
> > looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from
> > pvcalls_front_event_handler, if the response was for a POLL command.
> >
> > In pvcalls_front_event_handler, get the struct socket pointer from the
> > poll id (we previously converted struct socket* to uint64_t and used it
> > as id).
> >
> > Signed-off-by: Stefano Stabellini <stefano@xxxxxxxxxxx>
> > CC: boris.ostrovsky@xxxxxxxxxx
> > CC: jgross@xxxxxxxx
> > ---
> > drivers/xen/pvcalls-front.c | 134 ++++++++++++++++++++++++++++++++++++++++----
> > drivers/xen/pvcalls-front.h | 3 +
> > 2 files changed, 126 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index b4ca569..833b717 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -130,17 +130,35 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> > rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
> >
> > req_id = rsp->req_id;
> > - src = (uint8_t *)&bedata->rsp[req_id];
> > - src += sizeof(rsp->req_id);
> > - dst = (uint8_t *)rsp;
> > - dst += sizeof(rsp->req_id);
> > - memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> > - /*
> > - * First copy the rest of the data, then req_id. It is
> > - * paired with the barrier when accessing bedata->rsp.
> > - */
> > - smp_wmb();
> > - WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> > + if (rsp->cmd == PVCALLS_POLL) {
> > + struct socket *sock = (struct socket *) rsp->u.poll.id;
> > + struct sock_mapping *map =
> > + (struct sock_mapping *)
> > + READ_ONCE(sock->sk->sk_send_head);
> > +
> > + set_bit(PVCALLS_FLAG_POLL_RET,
> > + (void *)&map->passive.flags);
> > + /*
> > + * Set RET, then clear INFLIGHT. It pairs with
> > + * the checks at the beginning of
> > + * pvcalls_front_poll_passive.
> > + */
> > + smp_wmb();
>
> virt_wmb() (here, below, and I think I saw it somewhere else)
This smp_wmb is an internal barrier to the frontend, it doesn't
synchronize with the backend. It synchronizes only within the frontend.
A uniprocessor frontend wouldn't actually need this barrier. I admit it
is a bit confusing, I'll add a comment about this.
> > + clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> > + (void *)&map->passive.flags);
> > + } else {
> > + src = (uint8_t *)&bedata->rsp[req_id];
> > + src += sizeof(rsp->req_id);
> > + dst = (uint8_t *)rsp;
> > + dst += sizeof(rsp->req_id);
> > + memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> > + /*
> > + * First copy the rest of the data, then req_id. It is
> > + * paired with the barrier when accessing bedata->rsp.
> > + */
> > + smp_wmb();
> > + WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> > + }
> >
> > done = 1;
> > bedata->ring.rsp_cons++;
> > @@ -707,6 +725,100 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
> > return ret;
> > }
> >
> > +static unsigned int pvcalls_front_poll_passive(struct file *file,
> > + struct pvcalls_bedata *bedata,
> > + struct sock_mapping *map,
> > + poll_table *wait)
> > +{
> > + int notify, req_id;
> > + struct xen_pvcalls_request *req;
> > +
> > + if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> > + (void *)&map->passive.flags)) {
> > + poll_wait(file, &map->passive.inflight_accept_req, wait);
> > + return 0;
> > + }
> > +
> > + if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
> > + (void *)&map->passive.flags))
> > + return POLLIN;
> > +
> > + /*
> > + * First check RET, then INFLIGHT. No barriers necessary to
> > + * ensure execution ordering because of the conditional
> > + * instructions creating control dependencies.
> > + */
> > +
> > + if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> > + (void *)&map->passive.flags)) {
> > + poll_wait(file, &bedata->inflight_req, wait);
> > + return 0;
> > + }
> > +
> > + spin_lock(&bedata->pvcallss_lock);
> > + req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > + if (RING_FULL(&bedata->ring) ||
> > + READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > + spin_unlock(&bedata->pvcallss_lock);
> > + return -EAGAIN;
> > + }
> > + req = RING_GET_REQUEST(&bedata->ring, req_id);
> > + req->req_id = req_id;
> > + req->cmd = PVCALLS_POLL;
> > + req->u.poll.id = (uint64_t) map->sock;
> > +
> > + bedata->ring.req_prod_pvt++;
> > + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > + spin_unlock(&bedata->pvcallss_lock);
> > + if (notify)
> > + notify_remote_via_irq(bedata->irq);
> > +
> > + poll_wait(file, &bedata->inflight_req, wait);
> > + return 0;
> > +}
> > +
> > +static unsigned int pvcalls_front_poll_active(struct file *file,
> > + struct pvcalls_bedata *bedata,
> > + struct sock_mapping *map,
> > + poll_table *wait)
> > +{
> > + unsigned int mask = 0;
> > + int32_t in_error, out_error;
> > + struct pvcalls_data_intf *intf = map->active.ring;
> > +
> > + out_error = intf->out_error;
> > + in_error = intf->in_error;
> > +
> > + poll_wait(file, &map->active.inflight_conn_req, wait);
> > + if (pvcalls_front_write_todo(map))
> > + mask |= POLLOUT | POLLWRNORM;
> > + if (pvcalls_front_read_todo(map))
> > + mask |= POLLIN | POLLRDNORM;
> > + if (in_error != 0 || out_error != 0)
> > + mask |= POLLERR;
> > +
> > + return mask;
> > +}
> > +
> > +unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
> > + poll_table *wait)
> > +{
> > + struct pvcalls_bedata *bedata;
> > + struct sock_mapping *map;
> > +
> > + if (!pvcalls_front_dev)
> > + return POLLNVAL;
> > + bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > + map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > + if (!map)
> > + return POLLNVAL;
> > + if (map->active_socket)
> > + return pvcalls_front_poll_active(file, bedata, map, wait);
> > + else
> > + return pvcalls_front_poll_passive(file, bedata, map, wait);
> > +}
>
> All other routines return an int (0 or error code). Shouldn't they all
> have the same return type? (In fact, where are all these routines called
> from?)
poll is the only one different because it is meant to be used to
implement the "poll" function which is the only one to return unsigned
int instead of int. They caller is not part of this series, but give a
look at include/linux/net.h:struct proto_ops, it should give you a
pretty good idea.
> > static const struct xenbus_device_id pvcalls_front_ids[] = {
> > { "pvcalls" },
> > { "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index de24041..25e05b8 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -20,5 +20,8 @@ int pvcalls_front_recvmsg(struct socket *sock,
> > struct msghdr *msg,
> > size_t len,
> > int flags);
> > +unsigned int pvcalls_front_poll(struct file *file,
> > + struct socket *sock,
> > + poll_table *wait);
> >
> > #endif
>