Re: [PATCH v3 05/13] xen/pvcalls: implement connect command

From: Boris Ostrovsky
Date: Sat Aug 12 2017 - 21:14:05 EST




On 07/31/2017 06:57 PM, Stefano Stabellini wrote:
Send PVCALLS_CONNECT to the backend. Allocate a new ring and evtchn for
the active socket.

Introduce fields in struct sock_mapping to keep track of active sockets.
Introduce a waitqueue to allow the frontend to wait on data coming from
the backend on the active socket (recvmsg command).

Two mutexes (one of reads and one for writes) will be used to protect
the active socket in and out rings from concurrent accesses.

Signed-off-by: Stefano Stabellini <stefano@xxxxxxxxxxx>
CC: boris.ostrovsky@xxxxxxxxxx
CC: jgross@xxxxxxxx
---
drivers/xen/pvcalls-front.c | 146 ++++++++++++++++++++++++++++++++++++++++++++
drivers/xen/pvcalls-front.h | 2 +
2 files changed, 148 insertions(+)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 7c4a7cb..379b8fb 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -13,6 +13,10 @@
*/
#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/socket.h>
+
+#include <net/sock.h>
#include <xen/events.h>
#include <xen/grant_table.h>
@@ -44,6 +48,18 @@ struct sock_mapping {
bool active_socket;
struct list_head list;
struct socket *sock;
+ union {
+ struct {
+ int irq;
+ grant_ref_t ref;
+ struct pvcalls_data_intf *ring;
+ struct pvcalls_data data;
+ struct mutex in_mutex;
+ struct mutex out_mutex;
+
+ wait_queue_head_t inflight_conn_req;
+ } active;
+ };
};
static inline int get_request(struct pvcalls_bedata *bedata, int *req_id)
@@ -97,6 +113,18 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
return IRQ_HANDLED;
}
+static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
+{
+ struct sock_mapping *map = sock_map;
+
+ if (map == NULL)
+ return IRQ_HANDLED;
+
+ wake_up_interruptible(&map->active.inflight_conn_req);
+
+ return IRQ_HANDLED;
+}
+
int pvcalls_front_socket(struct socket *sock)
{
struct pvcalls_bedata *bedata;
@@ -162,6 +190,124 @@ int pvcalls_front_socket(struct socket *sock)
return ret;
}
+static int create_active(struct sock_mapping *map, int *evtchn)
+{
+ void *bytes;
+ int ret = -ENOMEM, irq = -1, i;
+
+ init_waitqueue_head(&map->active.inflight_conn_req);
+
+ map->active.ring = (struct pvcalls_data_intf *)
+ __get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (map->active.ring == NULL)
+ goto out_error;
+ memset(map->active.ring, 0, XEN_PAGE_SIZE);


Not needed (page allocated with __GFP_ZERO)

BTW, are you operating on XEN_PAGE_SIZE of PAGE_SIZE? Because if it's the former than __get_free_page() may be way more than what you need. (below too)


+ map->active.ring->ring_order = PVCALLS_RING_ORDER;
+ bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ map->active.ring->ring_order);
+ if (bytes == NULL)
+ goto out_error;
+ for (i = 0; i < (1 << map->active.ring->ring_order); i++)

PVCALLS_RING_ORDER


+ map->active.ring->ref[i] = gnttab_grant_foreign_access(
+ pvcalls_front_dev->otherend_id,
+ pfn_to_gfn(virt_to_pfn(bytes) + i), 0);
+
+ map->active.ref = gnttab_grant_foreign_access(
+ pvcalls_front_dev->otherend_id,
+ pfn_to_gfn(virt_to_pfn((void *)map->active.ring)), 0);
+
+ map->active.data.in = bytes;
+ map->active.data.out = bytes +
+ XEN_FLEX_RING_SIZE(map->active.ring->ring_order);
+
+ ret = xenbus_alloc_evtchn(pvcalls_front_dev, evtchn);
+ if (ret)
+ goto out_error;
+ irq = bind_evtchn_to_irqhandler(*evtchn, pvcalls_front_conn_handler,
+ 0, "pvcalls-frontend", map);
+ if (irq < 0) {
+ ret = irq;
+ goto out_error;
+ }
+
+ map->active.irq = irq;
+ map->active_socket = true;
+ mutex_init(&map->active.in_mutex);
+ mutex_init(&map->active.out_mutex);
+
+ return 0;
+
+out_error:
+ if (irq >= 0)
+ unbind_from_irqhandler(irq, map);
+ else if (*evtchn >= 0)

*evtchn may have been passed in as >=0. You probably want to set to -1 or something in the beginning.

+ xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
+ kfree(map->active.data.in);
+ kfree(map->active.ring);
+ kfree(map);

Probably a matter of personal style but I think it's better to free map in the caller.

Actually, should it be freed? The caller gets an error if get_request() or create_active() fail. In the first case map is not freed. Is the caller going to distinguish EAGAIN from any other error and know that map does not need to be allocated?

(Also I think you need to unlink the map from socket_mapping list before freeing it).

-boris

+ return ret;
+}
+
+int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
+ int addr_len, int flags)
+{
+ struct pvcalls_bedata *bedata;
+ struct sock_mapping *map = NULL;
+ struct xen_pvcalls_request *req;
+ int notify, req_id, ret, evtchn;
+
+ if (!pvcalls_front_dev)
+ return -ENETUNREACH;
+ if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
+ return -ENOTSUPP;
+
+ bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+ map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+ if (!map)
+ return -EINVAL;
+
+ spin_lock(&bedata->pvcallss_lock);
+ ret = get_request(bedata, &req_id);
+ if (ret < 0) {
+ spin_unlock(&bedata->pvcallss_lock);
+ return ret;
+ }
+ ret = create_active(map, &evtchn);
+ if (ret < 0) {
+ spin_unlock(&bedata->pvcallss_lock);
+ return ret;
+ }
+
+ req = RING_GET_REQUEST(&bedata->ring, req_id);
+ req->req_id = req_id;
+ req->cmd = PVCALLS_CONNECT;
+ req->u.connect.id = (uint64_t)map;
+ memcpy(req->u.connect.addr, addr, sizeof(*addr));
+ req->u.connect.len = addr_len;
+ req->u.connect.flags = flags;
+ req->u.connect.ref = map->active.ref;
+ req->u.connect.evtchn = evtchn;
+
+ map->sock = sock;
+
+ bedata->ring.req_prod_pvt++;
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+ spin_unlock(&bedata->pvcallss_lock);
+
+ if (notify)
+ notify_remote_via_irq(bedata->irq);
+
+ wait_event(bedata->inflight_req,
+ READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+ ret = bedata->rsp[req_id].ret;
+ /* read ret, then set this rsp slot to be reused */
+ smp_mb();
+ WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+ return ret;
+}
+
static const struct xenbus_device_id pvcalls_front_ids[] = {
{ "pvcalls" },
{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index b7dabed..63b0417 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -4,5 +4,7 @@
#include <linux/net.h>
int pvcalls_front_socket(struct socket *sock);
+int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
+ int addr_len, int flags);
#endif