[PATCH] goldfish_pipe: An implementation of more parallel pipe

From: Greg KH
Date: Thu Apr 20 2017 - 10:05:32 EST



From: Jin Qian <jinqian@xxxxxxxxxxx>

This is a driver code for a redesigned android pipe.
Currently it works for x86 and x64 emulators with the following
performance results:
ADB push to /dev/null,
Ubuntu,
400 MB file,
times are for (1 / 10 / 100) parallel adb commands
x86 adb push: (4.4s / 11.5s / 2m10s) -> (2.8s / 6s / 51s)
x64 adb push: (7s / 15s / (too long, 6m+) -> (2.7s / 6.2s / 52s)

ADB pull and push to /data/ have the same %% of speedup
More importantly, I don't see any signs of slowdowns when
run in parallel with Antutu benchmark, so it is definitely
making much better job at multithreading.

Signed-off-by: Yurii Zubrytskyi <zyy@xxxxxxxxxx>
Signed-off-by: Jin Qian <jinqian@xxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
drivers/platform/goldfish/goldfish_pipe.c | 976 +++++++++++++++++++-----------
1 file changed, 644 insertions(+), 332 deletions(-)

This is pretty much a total rewrite of the old goldfish_pipe driver from
Jin and Yurii. Given that no one uses this code outside of the goldfish
emulator, and breaking it up into tiny pieces doesn't make any sense.
I'm thinking I'll just take this patch as-is. No need for the old
driver to stick around, again, because no one really uses it.

Any objection to this?

thanks,

greg k-h


--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -1,8 +1,8 @@
/*
- * Copyright (C) 2011 Google, Inc.
* Copyright (C) 2012 Intel, Inc.
* Copyright (C) 2013 Intel, Inc.
* Copyright (C) 2014 Linaro Limited
+ * Copyright (C) 2011-2016 Google, Inc.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -46,6 +46,7 @@
* exchange is properly mapped during a transfer.
*/

+
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
@@ -63,122 +64,232 @@
#include <linux/acpi.h>

/*
+ * Update this when something changes in the driver's behavior so the host
+ * can benefit from knowing it
+ */
+enum {
+ PIPE_DRIVER_VERSION = 2,
+ PIPE_CURRENT_DEVICE_VERSION = 2
+};
+
+/*
* IMPORTANT: The following constants must match the ones used and defined
* in external/qemu/hw/goldfish_pipe.c in the Android source tree.
*/

-/* pipe device registers */
-#define PIPE_REG_COMMAND 0x00 /* write: value = command */
-#define PIPE_REG_STATUS 0x04 /* read */
-#define PIPE_REG_CHANNEL 0x08 /* read/write: channel id */
-#define PIPE_REG_CHANNEL_HIGH 0x30 /* read/write: channel id */
-#define PIPE_REG_SIZE 0x0c /* read/write: buffer size */
-#define PIPE_REG_ADDRESS 0x10 /* write: physical address */
-#define PIPE_REG_ADDRESS_HIGH 0x34 /* write: physical address */
-#define PIPE_REG_WAKES 0x14 /* read: wake flags */
-#define PIPE_REG_PARAMS_ADDR_LOW 0x18 /* read/write: batch data address */
-#define PIPE_REG_PARAMS_ADDR_HIGH 0x1c /* read/write: batch data address */
-#define PIPE_REG_ACCESS_PARAMS 0x20 /* write: batch access */
-#define PIPE_REG_VERSION 0x24 /* read: device version */
-
-/* list of commands for PIPE_REG_COMMAND */
-#define CMD_OPEN 1 /* open new channel */
-#define CMD_CLOSE 2 /* close channel (from guest) */
-#define CMD_POLL 3 /* poll read/write status */
-
/* List of bitflags returned in status of CMD_POLL command */
-#define PIPE_POLL_IN (1 << 0)
-#define PIPE_POLL_OUT (1 << 1)
-#define PIPE_POLL_HUP (1 << 2)
-
-/* The following commands are related to write operations */
-#define CMD_WRITE_BUFFER 4 /* send a user buffer to the emulator */
-#define CMD_WAKE_ON_WRITE 5 /* tell the emulator to wake us when writing
- is possible */
-#define CMD_READ_BUFFER 6 /* receive a user buffer from the emulator */
-#define CMD_WAKE_ON_READ 7 /* tell the emulator to wake us when reading
- * is possible */
+enum PipePollFlags {
+ PIPE_POLL_IN = 1 << 0,
+ PIPE_POLL_OUT = 1 << 1,
+ PIPE_POLL_HUP = 1 << 2
+};

/* Possible status values used to signal errors - see goldfish_pipe_error_convert */
-#define PIPE_ERROR_INVAL -1
-#define PIPE_ERROR_AGAIN -2
-#define PIPE_ERROR_NOMEM -3
-#define PIPE_ERROR_IO -4
+enum PipeErrors {
+ PIPE_ERROR_INVAL = -1,
+ PIPE_ERROR_AGAIN = -2,
+ PIPE_ERROR_NOMEM = -3,
+ PIPE_ERROR_IO = -4
+};

/* Bit-flags used to signal events from the emulator */
-#define PIPE_WAKE_CLOSED (1 << 0) /* emulator closed pipe */
-#define PIPE_WAKE_READ (1 << 1) /* pipe can now be read from */
-#define PIPE_WAKE_WRITE (1 << 2) /* pipe can now be written to */
-
-struct access_params {
- unsigned long channel;
- u32 size;
- unsigned long address;
- u32 cmd;
- u32 result;
- /* reserved for future extension */
+enum PipeWakeFlags {
+ PIPE_WAKE_CLOSED = 1 << 0, /* emulator closed pipe */
+ PIPE_WAKE_READ = 1 << 1, /* pipe can now be read from */
+ PIPE_WAKE_WRITE = 1 << 2 /* pipe can now be written to */
+};
+
+/* Bit flags for the 'flags' field */
+enum PipeFlagsBits {
+ BIT_CLOSED_ON_HOST = 0, /* pipe closed by host */
+ BIT_WAKE_ON_WRITE = 1, /* want to be woken on writes */
+ BIT_WAKE_ON_READ = 2, /* want to be woken on reads */
+};
+
+enum PipeRegs {
+ PIPE_REG_CMD = 0,
+
+ PIPE_REG_SIGNAL_BUFFER_HIGH = 4,
+ PIPE_REG_SIGNAL_BUFFER = 8,
+ PIPE_REG_SIGNAL_BUFFER_COUNT = 12,
+
+ PIPE_REG_OPEN_BUFFER_HIGH = 20,
+ PIPE_REG_OPEN_BUFFER = 24,
+
+ PIPE_REG_VERSION = 36,
+
+ PIPE_REG_GET_SIGNALLED = 48,
+};
+
+enum PipeCmdCode {
+ PIPE_CMD_OPEN = 1, /* to be used by the pipe device itself */
+ PIPE_CMD_CLOSE,
+ PIPE_CMD_POLL,
+ PIPE_CMD_WRITE,
+ PIPE_CMD_WAKE_ON_WRITE,
+ PIPE_CMD_READ,
+ PIPE_CMD_WAKE_ON_READ,
+
+ /*
+ * TODO(zyy): implement a deferred read/write execution to allow
+ * parallel processing of pipe operations on the host.
+ */
+ PIPE_CMD_WAKE_ON_DONE_IO,
+};
+
+enum {
+ MAX_BUFFERS_PER_COMMAND = 336,
+ MAX_SIGNALLED_PIPES = 64,
+ INITIAL_PIPES_CAPACITY = 64
+};
+
+struct goldfish_pipe_dev;
+struct goldfish_pipe;
+struct goldfish_pipe_command;
+
+/* A per-pipe command structure, shared with the host */
+struct goldfish_pipe_command {
+ s32 cmd; /* PipeCmdCode, guest -> host */
+ s32 id; /* pipe id, guest -> host */
+ s32 status; /* command execution status, host -> guest */
+ s32 reserved; /* to pad to 64-bit boundary */
+ union {
+ /* Parameters for PIPE_CMD_{READ,WRITE} */
+ struct {
+ /* number of buffers, guest -> host */
+ u32 buffers_count;
+ /* number of consumed bytes, host -> guest */
+ s32 consumed_size;
+ /* buffer pointers, guest -> host */
+ u64 ptrs[MAX_BUFFERS_PER_COMMAND];
+ /* buffer sizes, guest -> host */
+ u32 sizes[MAX_BUFFERS_PER_COMMAND];
+ } rw_params;
+ };
+};
+
+/* A single signalled pipe information */
+struct signalled_pipe_buffer {
+ u32 id;
u32 flags;
};

-/* The global driver data. Holds a reference to the i/o page used to
- * communicate with the emulator, and a wake queue for blocked tasks
- * waiting to be awoken.
- */
-struct goldfish_pipe_dev {
- spinlock_t lock;
- unsigned char __iomem *base;
- struct access_params *aps;
- int irq;
- u32 version;
+/* Parameters for the PIPE_CMD_OPEN command */
+struct open_command_param {
+ u64 command_buffer_ptr;
+ u32 rw_params_max_count;
};

-static struct goldfish_pipe_dev pipe_dev[1];
+/* Device-level set of buffers shared with the host */
+struct goldfish_pipe_dev_buffers {
+ struct open_command_param open_command_params;
+ struct signalled_pipe_buffer signalled_pipe_buffers[
+ MAX_SIGNALLED_PIPES];
+};

/* This data type models a given pipe instance */
struct goldfish_pipe {
- struct goldfish_pipe_dev *dev;
- struct mutex lock;
+ /* pipe ID - index into goldfish_pipe_dev::pipes array */
+ u32 id;
+ /* The wake flags pipe is waiting for
+ * Note: not protected with any lock, uses atomic operations
+ * and barriers to make it thread-safe.
+ */
unsigned long flags;
+ /* wake flags host have signalled,
+ * - protected by goldfish_pipe_dev::lock
+ */
+ unsigned long signalled_flags;
+
+ /* A pointer to command buffer */
+ struct goldfish_pipe_command *command_buffer;
+
+ /* doubly linked list of signalled pipes, protected by
+ * goldfish_pipe_dev::lock
+ */
+ struct goldfish_pipe *prev_signalled;
+ struct goldfish_pipe *next_signalled;
+
+ /*
+ * A pipe's own lock. Protects the following:
+ * - *command_buffer - makes sure a command can safely write its
+ * parameters to the host and read the results back.
+ */
+ struct mutex lock;
+
+ /* A wake queue for sleeping until host signals an event */
wait_queue_head_t wake_queue;
+ /* Pointer to the parent goldfish_pipe_dev instance */
+ struct goldfish_pipe_dev *dev;
};

+/* The global driver data. Holds a reference to the i/o page used to
+ * communicate with the emulator, and a wake queue for blocked tasks
+ * waiting to be awoken.
+ */
+struct goldfish_pipe_dev {
+ /*
+ * Global device spinlock. Protects the following members:
+ * - pipes, pipes_capacity
+ * - [*pipes, *pipes + pipes_capacity) - array data
+ * - first_signalled_pipe,
+ * goldfish_pipe::prev_signalled,
+ * goldfish_pipe::next_signalled,
+ * goldfish_pipe::signalled_flags - all singnalled-related fields,
+ * in all allocated pipes
+ * - open_command_params - PIPE_CMD_OPEN-related buffers
+ *
+ * It looks like a lot of different fields, but the trick is that
+ * the only operation that happens often is the signalled pipes array
+ * manipulation. That's why it's OK for now to keep the rest of the
+ * fields under the same lock. If we notice too much contention because
+ * of PIPE_CMD_OPEN, then we should add a separate lock there.
+ */
+ spinlock_t lock;

-/* Bit flags for the 'flags' field */
-enum {
- BIT_CLOSED_ON_HOST = 0, /* pipe closed by host */
- BIT_WAKE_ON_WRITE = 1, /* want to be woken on writes */
- BIT_WAKE_ON_READ = 2, /* want to be woken on reads */
+ /*
+ * Array of the pipes of |pipes_capacity| elements,
+ * indexed by goldfish_pipe::id
+ */
+ struct goldfish_pipe **pipes;
+ u32 pipes_capacity;
+
+ /* Pointers to the buffers host uses for interaction with this driver */
+ struct goldfish_pipe_dev_buffers *buffers;
+
+ /* Head of a doubly linked list of signalled pipes */
+ struct goldfish_pipe *first_signalled_pipe;
+
+ /* Some device-specific data */
+ int irq;
+ int version;
+ unsigned char __iomem *base;
};

+struct goldfish_pipe_dev pipe_dev[1] = {};

-static u32 goldfish_cmd_status(struct goldfish_pipe *pipe, u32 cmd)
+static int goldfish_cmd_locked(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
{
- unsigned long flags;
- u32 status;
- struct goldfish_pipe_dev *dev = pipe->dev;
-
- spin_lock_irqsave(&dev->lock, flags);
- gf_write_ptr(pipe, dev->base + PIPE_REG_CHANNEL,
- dev->base + PIPE_REG_CHANNEL_HIGH);
- writel(cmd, dev->base + PIPE_REG_COMMAND);
- status = readl(dev->base + PIPE_REG_STATUS);
- spin_unlock_irqrestore(&dev->lock, flags);
- return status;
+ pipe->command_buffer->cmd = cmd;
+ /* failure by default */
+ pipe->command_buffer->status = PIPE_ERROR_INVAL;
+ writel(pipe->id, pipe->dev->base + PIPE_REG_CMD);
+ return pipe->command_buffer->status;
}

-static void goldfish_cmd(struct goldfish_pipe *pipe, u32 cmd)
+static int goldfish_cmd(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
{
- unsigned long flags;
- struct goldfish_pipe_dev *dev = pipe->dev;
+ int status;

- spin_lock_irqsave(&dev->lock, flags);
- gf_write_ptr(pipe, dev->base + PIPE_REG_CHANNEL,
- dev->base + PIPE_REG_CHANNEL_HIGH);
- writel(cmd, dev->base + PIPE_REG_COMMAND);
- spin_unlock_irqrestore(&dev->lock, flags);
+ if (mutex_lock_interruptible(&pipe->lock))
+ return PIPE_ERROR_IO;
+ status = goldfish_cmd_locked(pipe, cmd);
+ mutex_unlock(&pipe->lock);
+ return status;
}

-/* This function converts an error code returned by the emulator through
+/*
+ * This function converts an error code returned by the emulator through
* the PIPE_REG_STATUS i/o register into a valid negative errno value.
*/
static int goldfish_pipe_error_convert(int status)
@@ -195,184 +306,202 @@ static int goldfish_pipe_error_convert(i
}
}

-/*
- * Notice: QEMU will return 0 for un-known register access, indicating
- * param_acess is supported or not
- */
-static int valid_batchbuffer_addr(struct goldfish_pipe_dev *dev,
- struct access_params *aps)
+static int pin_user_pages(unsigned long first_page, unsigned long last_page,
+ unsigned int last_page_size, int is_write,
+ struct page *pages[MAX_BUFFERS_PER_COMMAND],
+ unsigned int *iter_last_page_size)
+{
+ int ret;
+ int requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1;
+
+ if (requested_pages > MAX_BUFFERS_PER_COMMAND) {
+ requested_pages = MAX_BUFFERS_PER_COMMAND;
+ *iter_last_page_size = PAGE_SIZE;
+ } else {
+ *iter_last_page_size = last_page_size;
+ }
+
+ ret = get_user_pages_fast(
+ first_page, requested_pages, !is_write, pages);
+ if (ret <= 0)
+ return -EFAULT;
+ if (ret < requested_pages)
+ *iter_last_page_size = PAGE_SIZE;
+ return ret;
+
+}
+
+static void release_user_pages(struct page **pages, int pages_count,
+ int is_write, s32 consumed_size)
{
- u32 aph, apl;
- u64 paddr;
- aph = readl(dev->base + PIPE_REG_PARAMS_ADDR_HIGH);
- apl = readl(dev->base + PIPE_REG_PARAMS_ADDR_LOW);
+ int i;

- paddr = ((u64)aph << 32) | apl;
- if (paddr != (__pa(aps)))
- return 0;
- return 1;
+ for (i = 0; i < pages_count; i++) {
+ if (!is_write && consumed_size > 0)
+ set_page_dirty(pages[i]);
+ put_page(pages[i]);
+ }
+}
+
+/* Populate the call parameters, merging adjacent pages together */
+static void populate_rw_params(
+ struct page **pages, int pages_count,
+ unsigned long address, unsigned long address_end,
+ unsigned long first_page, unsigned long last_page,
+ unsigned int iter_last_page_size, int is_write,
+ struct goldfish_pipe_command *command)
+{
+ /*
+ * Process the first page separately - it's the only page that
+ * needs special handling for its start address.
+ */
+ unsigned long xaddr = page_to_phys(pages[0]);
+ unsigned long xaddr_prev = xaddr;
+ int buffer_idx = 0;
+ int i = 1;
+ int size_on_page = first_page == last_page
+ ? (int)(address_end - address)
+ : (PAGE_SIZE - (address & ~PAGE_MASK));
+ command->rw_params.ptrs[0] = (u64)(xaddr | (address & ~PAGE_MASK));
+ command->rw_params.sizes[0] = size_on_page;
+ for (; i < pages_count; ++i) {
+ xaddr = page_to_phys(pages[i]);
+ size_on_page = (i == pages_count - 1) ?
+ iter_last_page_size : PAGE_SIZE;
+ if (xaddr == xaddr_prev + PAGE_SIZE) {
+ command->rw_params.sizes[buffer_idx] += size_on_page;
+ } else {
+ ++buffer_idx;
+ command->rw_params.ptrs[buffer_idx] = (u64)xaddr;
+ command->rw_params.sizes[buffer_idx] = size_on_page;
+ }
+ xaddr_prev = xaddr;
+ }
+ command->rw_params.buffers_count = buffer_idx + 1;
}

-/* 0 on success */
-static int setup_access_params_addr(struct platform_device *pdev,
- struct goldfish_pipe_dev *dev)
+static int transfer_max_buffers(struct goldfish_pipe *pipe,
+ unsigned long address, unsigned long address_end, int is_write,
+ unsigned long last_page, unsigned int last_page_size,
+ s32 *consumed_size, int *status)
{
- dma_addr_t dma_handle;
- struct access_params *aps;
+ struct page *pages[MAX_BUFFERS_PER_COMMAND];
+ unsigned long first_page = address & PAGE_MASK;
+ unsigned int iter_last_page_size;
+ int pages_count = pin_user_pages(first_page, last_page,
+ last_page_size, is_write,
+ pages, &iter_last_page_size);

- aps = dmam_alloc_coherent(&pdev->dev, sizeof(struct access_params),
- &dma_handle, GFP_KERNEL);
- if (!aps)
- return -ENOMEM;
+ if (pages_count < 0)
+ return pages_count;

- writel(upper_32_bits(dma_handle), dev->base + PIPE_REG_PARAMS_ADDR_HIGH);
- writel(lower_32_bits(dma_handle), dev->base + PIPE_REG_PARAMS_ADDR_LOW);
+ /* Serialize access to the pipe command buffers */
+ if (mutex_lock_interruptible(&pipe->lock))
+ return -ERESTARTSYS;

- if (valid_batchbuffer_addr(dev, aps)) {
- dev->aps = aps;
- return 0;
- } else
- return -1;
+ populate_rw_params(pages, pages_count, address, address_end,
+ first_page, last_page, iter_last_page_size, is_write,
+ pipe->command_buffer);
+
+ /* Transfer the data */
+ *status = goldfish_cmd_locked(pipe,
+ is_write ? PIPE_CMD_WRITE : PIPE_CMD_READ);
+
+ *consumed_size = pipe->command_buffer->rw_params.consumed_size;
+
+ mutex_unlock(&pipe->lock);
+
+ release_user_pages(pages, pages_count, is_write, *consumed_size);
+
+ return 0;
}

-/* A value that will not be set by qemu emulator */
-#define INITIAL_BATCH_RESULT (0xdeadbeaf)
-static int access_with_param(struct goldfish_pipe_dev *dev, const int cmd,
- unsigned long address, unsigned long avail,
- struct goldfish_pipe *pipe, int *status)
-{
- struct access_params *aps = dev->aps;
-
- if (aps == NULL)
- return -1;
-
- aps->result = INITIAL_BATCH_RESULT;
- aps->channel = (unsigned long)pipe;
- aps->size = avail;
- aps->address = address;
- aps->cmd = cmd;
- writel(cmd, dev->base + PIPE_REG_ACCESS_PARAMS);
- /*
- * If the aps->result has not changed, that means
- * that the batch command failed
- */
- if (aps->result == INITIAL_BATCH_RESULT)
- return -1;
- *status = aps->result;
+static int wait_for_host_signal(struct goldfish_pipe *pipe, int is_write)
+{
+ u32 wakeBit = is_write ? BIT_WAKE_ON_WRITE : BIT_WAKE_ON_READ;
+
+ set_bit(wakeBit, &pipe->flags);
+
+ /* Tell the emulator we're going to wait for a wake event */
+ (void)goldfish_cmd(pipe,
+ is_write ? PIPE_CMD_WAKE_ON_WRITE : PIPE_CMD_WAKE_ON_READ);
+
+ while (test_bit(wakeBit, &pipe->flags)) {
+ if (wait_event_interruptible(
+ pipe->wake_queue,
+ !test_bit(wakeBit, &pipe->flags)))
+ return -ERESTARTSYS;
+
+ if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
+ return -EIO;
+ }
+
return 0;
}

-static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer,
- size_t bufflen, int is_write)
+static ssize_t goldfish_pipe_read_write(struct file *filp,
+ char __user *buffer, size_t bufflen, int is_write)
{
- unsigned long irq_flags;
struct goldfish_pipe *pipe = filp->private_data;
- struct goldfish_pipe_dev *dev = pipe->dev;
- unsigned long address, address_end;
int count = 0, ret = -EINVAL;
+ unsigned long address, address_end, last_page;
+ unsigned int last_page_size;

/* If the emulator already closed the pipe, no need to go further */
- if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
+ if (unlikely(test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)))
return -EIO;
-
/* Null reads or writes succeeds */
if (unlikely(bufflen == 0))
return 0;
-
/* Check the buffer range for access */
- if (!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ,
- buffer, bufflen))
+ if (unlikely(!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ,
+ buffer, bufflen)))
return -EFAULT;

- /* Serialize access to the pipe */
- if (mutex_lock_interruptible(&pipe->lock))
- return -ERESTARTSYS;
-
- address = (unsigned long)(void *)buffer;
+ address = (unsigned long)buffer;
address_end = address + bufflen;
+ last_page = (address_end - 1) & PAGE_MASK;
+ last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1;

while (address < address_end) {
- unsigned long page_end = (address & PAGE_MASK) + PAGE_SIZE;
- unsigned long next = page_end < address_end ? page_end
- : address_end;
- unsigned long avail = next - address;
- int status, wakeBit;
- struct page *page;
-
- /* Either vaddr or paddr depending on the device version */
- unsigned long xaddr;
+ s32 consumed_size;
+ int status;

- /*
- * We grab the pages on a page-by-page basis in case user
- * space gives us a potentially huge buffer but the read only
- * returns a small amount, then there's no need to pin that
- * much memory to the process.
- */
- ret = get_user_pages_unlocked(address, 1, &page,
- is_write ? 0 : FOLL_WRITE);
+ ret = transfer_max_buffers(pipe, address, address_end, is_write,
+ last_page, last_page_size, &consumed_size,
+ &status);
if (ret < 0)
break;

- if (dev->version) {
- /* Device version 1 or newer (qemu-android) expects the
- * physical address.
+ if (consumed_size > 0) {
+ /* No matter what's the status, we've transferred
+ * something.
*/
- xaddr = page_to_phys(page) | (address & ~PAGE_MASK);
- } else {
- /* Device version 0 (classic emulator) expects the
- * virtual address.
- */
- xaddr = address;
+ count += consumed_size;
+ address += consumed_size;
}
-
- /* Now, try to transfer the bytes in the current page */
- spin_lock_irqsave(&dev->lock, irq_flags);
- if (access_with_param(dev,
- is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER,
- xaddr, avail, pipe, &status)) {
- gf_write_ptr(pipe, dev->base + PIPE_REG_CHANNEL,
- dev->base + PIPE_REG_CHANNEL_HIGH);
- writel(avail, dev->base + PIPE_REG_SIZE);
- gf_write_ptr((void *)xaddr,
- dev->base + PIPE_REG_ADDRESS,
- dev->base + PIPE_REG_ADDRESS_HIGH);
- writel(is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER,
- dev->base + PIPE_REG_COMMAND);
- status = readl(dev->base + PIPE_REG_STATUS);
- }
- spin_unlock_irqrestore(&dev->lock, irq_flags);
-
- if (status > 0 && !is_write)
- set_page_dirty(page);
- put_page(page);
-
- if (status > 0) { /* Correct transfer */
- count += status;
- address += status;
+ if (status > 0)
continue;
- } else if (status == 0) { /* EOF */
+ if (status == 0) {
+ /* EOF */
ret = 0;
break;
- } else if (status < 0 && count > 0) {
+ }
+ if (count > 0) {
/*
- * An error occurred and we already transferred
- * something on one of the previous pages.
+ * An error occurred, but we already transferred
+ * something on one of the previous iterations.
* Just return what we already copied and log this
* err.
- *
- * Note: This seems like an incorrect approach but
- * cannot change it until we check if any user space
- * ABI relies on this behavior.
*/
if (status != PIPE_ERROR_AGAIN)
- pr_info_ratelimited("goldfish_pipe: backend returned error %d on %s\n",
+ pr_info_ratelimited("goldfish_pipe: backend error %d on %s\n",
status, is_write ? "write" : "read");
- ret = 0;
break;
}

/*
- * If the error is not PIPE_ERROR_AGAIN, or if we are not in
+ * If the error is not PIPE_ERROR_AGAIN, or if we are in
* non-blocking mode, just return the error code.
*/
if (status != PIPE_ERROR_AGAIN ||
@@ -381,139 +510,214 @@ static ssize_t goldfish_pipe_read_write(
break;
}

- /*
- * The backend blocked the read/write, wait until the backend
- * tells us it's ready to process more data.
- */
- wakeBit = is_write ? BIT_WAKE_ON_WRITE : BIT_WAKE_ON_READ;
- set_bit(wakeBit, &pipe->flags);
-
- /* Tell the emulator we're going to wait for a wake event */
- goldfish_cmd(pipe,
- is_write ? CMD_WAKE_ON_WRITE : CMD_WAKE_ON_READ);
-
- /* Unlock the pipe, then wait for the wake signal */
- mutex_unlock(&pipe->lock);
-
- while (test_bit(wakeBit, &pipe->flags)) {
- if (wait_event_interruptible(
- pipe->wake_queue,
- !test_bit(wakeBit, &pipe->flags)))
- return -ERESTARTSYS;
-
- if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
- return -EIO;
- }
-
- /* Try to re-acquire the lock */
- if (mutex_lock_interruptible(&pipe->lock))
- return -ERESTARTSYS;
+ status = wait_for_host_signal(pipe, is_write);
+ if (status < 0)
+ return status;
}
- mutex_unlock(&pipe->lock);

- if (ret < 0)
- return ret;
- else
+ if (count > 0)
return count;
+ return ret;
}

static ssize_t goldfish_pipe_read(struct file *filp, char __user *buffer,
- size_t bufflen, loff_t *ppos)
+ size_t bufflen, loff_t *ppos)
{
- return goldfish_pipe_read_write(filp, buffer, bufflen, 0);
+ return goldfish_pipe_read_write(filp, buffer, bufflen,
+ /* is_write */ 0);
}

static ssize_t goldfish_pipe_write(struct file *filp,
const char __user *buffer, size_t bufflen,
loff_t *ppos)
{
- return goldfish_pipe_read_write(filp, (char __user *)buffer,
- bufflen, 1);
+ return goldfish_pipe_read_write(filp,
+ /* cast away the const */(char __user *)buffer, bufflen,
+ /* is_write */ 1);
}

-
static unsigned int goldfish_pipe_poll(struct file *filp, poll_table *wait)
{
struct goldfish_pipe *pipe = filp->private_data;
unsigned int mask = 0;
int status;

- mutex_lock(&pipe->lock);
-
poll_wait(filp, &pipe->wake_queue, wait);

- status = goldfish_cmd_status(pipe, CMD_POLL);
-
- mutex_unlock(&pipe->lock);
+ status = goldfish_cmd(pipe, PIPE_CMD_POLL);
+ if (status < 0)
+ return -ERESTARTSYS;

if (status & PIPE_POLL_IN)
mask |= POLLIN | POLLRDNORM;
-
if (status & PIPE_POLL_OUT)
mask |= POLLOUT | POLLWRNORM;
-
if (status & PIPE_POLL_HUP)
mask |= POLLHUP;
-
if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
mask |= POLLERR;

return mask;
}

-static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id)
+static void signalled_pipes_add_locked(struct goldfish_pipe_dev *dev,
+ u32 id, u32 flags)
{
- struct goldfish_pipe_dev *dev = dev_id;
- unsigned long irq_flags;
- int count = 0;
+ struct goldfish_pipe *pipe;

- /*
- * We're going to read from the emulator a list of (channel,flags)
- * pairs corresponding to the wake events that occurred on each
- * blocked pipe (i.e. channel).
- */
- spin_lock_irqsave(&dev->lock, irq_flags);
- for (;;) {
- /* First read the channel, 0 means the end of the list */
- struct goldfish_pipe *pipe;
- unsigned long wakes;
- unsigned long channel = 0;
+ if (WARN_ON(id >= dev->pipes_capacity))
+ return;

-#ifdef CONFIG_64BIT
- channel = (u64)readl(dev->base + PIPE_REG_CHANNEL_HIGH) << 32;
+ pipe = dev->pipes[id];
+ if (!pipe)
+ return;
+ pipe->signalled_flags |= flags;
+
+ if (pipe->prev_signalled || pipe->next_signalled
+ || dev->first_signalled_pipe == pipe)
+ return; /* already in the list */
+ pipe->next_signalled = dev->first_signalled_pipe;
+ if (dev->first_signalled_pipe)
+ dev->first_signalled_pipe->prev_signalled = pipe;
+ dev->first_signalled_pipe = pipe;
+}
+
+static void signalled_pipes_remove_locked(struct goldfish_pipe_dev *dev,
+ struct goldfish_pipe *pipe) {
+ if (pipe->prev_signalled)
+ pipe->prev_signalled->next_signalled = pipe->next_signalled;
+ if (pipe->next_signalled)
+ pipe->next_signalled->prev_signalled = pipe->prev_signalled;
+ if (pipe == dev->first_signalled_pipe)
+ dev->first_signalled_pipe = pipe->next_signalled;
+ pipe->prev_signalled = NULL;
+ pipe->next_signalled = NULL;
+}

- if (channel == 0)
- break;
-#endif
- channel |= readl(dev->base + PIPE_REG_CHANNEL);
+static struct goldfish_pipe *signalled_pipes_pop_front(
+ struct goldfish_pipe_dev *dev, int *wakes)
+{
+ struct goldfish_pipe *pipe;
+ unsigned long flags;

- if (channel == 0)
- break;
+ spin_lock_irqsave(&dev->lock, flags);

- /* Convert channel to struct pipe pointer + read wake flags */
- wakes = readl(dev->base + PIPE_REG_WAKES);
- pipe = (struct goldfish_pipe *)(ptrdiff_t)channel;
+ pipe = dev->first_signalled_pipe;
+ if (pipe) {
+ *wakes = pipe->signalled_flags;
+ pipe->signalled_flags = 0;
+ /*
+ * This is an optimized version of
+ * signalled_pipes_remove_locked()
+ * - We want to make it as fast as possible to
+ * wake the sleeping pipe operations faster.
+ */
+ dev->first_signalled_pipe = pipe->next_signalled;
+ if (dev->first_signalled_pipe)
+ dev->first_signalled_pipe->prev_signalled = NULL;
+ pipe->next_signalled = NULL;
+ }

- /* Did the emulator just closed a pipe? */
+ spin_unlock_irqrestore(&dev->lock, flags);
+ return pipe;
+}
+
+static void goldfish_interrupt_task(unsigned long unused)
+{
+ struct goldfish_pipe_dev *dev = pipe_dev;
+ /* Iterate over the signalled pipes and wake them one by one */
+ struct goldfish_pipe *pipe;
+ int wakes;
+
+ while ((pipe = signalled_pipes_pop_front(dev, &wakes)) != NULL) {
if (wakes & PIPE_WAKE_CLOSED) {
- set_bit(BIT_CLOSED_ON_HOST, &pipe->flags);
- wakes |= PIPE_WAKE_READ | PIPE_WAKE_WRITE;
+ pipe->flags = 1 << BIT_CLOSED_ON_HOST;
+ } else {
+ if (wakes & PIPE_WAKE_READ)
+ clear_bit(BIT_WAKE_ON_READ, &pipe->flags);
+ if (wakes & PIPE_WAKE_WRITE)
+ clear_bit(BIT_WAKE_ON_WRITE, &pipe->flags);
}
- if (wakes & PIPE_WAKE_READ)
- clear_bit(BIT_WAKE_ON_READ, &pipe->flags);
- if (wakes & PIPE_WAKE_WRITE)
- clear_bit(BIT_WAKE_ON_WRITE, &pipe->flags);
-
+ /*
+ * wake_up_interruptible() implies a write barrier, so don't
+ * explicitly add another one here.
+ */
wake_up_interruptible(&pipe->wake_queue);
- count++;
}
- spin_unlock_irqrestore(&dev->lock, irq_flags);
+}
+DECLARE_TASKLET(goldfish_interrupt_tasklet, goldfish_interrupt_task, 0);
+
+/*
+ * The general idea of the interrupt handling:
+ *
+ * 1. device raises an interrupt if there's at least one signalled pipe
+ * 2. IRQ handler reads the signalled pipes and their count from the device
+ * 3. device writes them into a shared buffer and returns the count
+ * it only resets the IRQ if it has returned all signalled pipes,
+ * otherwise it leaves it raised, so IRQ handler will be called
+ * again for the next chunk
+ * 4. IRQ handler adds all returned pipes to the device's signalled pipes list
+ * 5. IRQ handler launches a tasklet to process the signalled pipes from the
+ * list in a separate context
+ */
+static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id)
+{
+ u32 count;
+ u32 i;
+ unsigned long flags;
+ struct goldfish_pipe_dev *dev = dev_id;
+
+ if (dev != pipe_dev)
+ return IRQ_NONE;
+
+ /* Request the signalled pipes from the device */
+ spin_lock_irqsave(&dev->lock, flags);
+
+ count = readl(dev->base + PIPE_REG_GET_SIGNALLED);
+ if (count == 0) {
+ spin_unlock_irqrestore(&dev->lock, flags);
+ return IRQ_NONE;
+ }
+ if (count > MAX_SIGNALLED_PIPES)
+ count = MAX_SIGNALLED_PIPES;

- return (count == 0) ? IRQ_NONE : IRQ_HANDLED;
+ for (i = 0; i < count; ++i)
+ signalled_pipes_add_locked(dev,
+ dev->buffers->signalled_pipe_buffers[i].id,
+ dev->buffers->signalled_pipe_buffers[i].flags);
+
+ spin_unlock_irqrestore(&dev->lock, flags);
+
+ tasklet_schedule(&goldfish_interrupt_tasklet);
+ return IRQ_HANDLED;
+}
+
+static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev)
+{
+ int id;
+
+ for (id = 0; id < dev->pipes_capacity; ++id)
+ if (!dev->pipes[id])
+ return id;
+
+ {
+ /* Reallocate the array */
+ u32 new_capacity = 2 * dev->pipes_capacity;
+ struct goldfish_pipe **pipes =
+ kcalloc(new_capacity, sizeof(*pipes), GFP_KERNEL);
+ if (!pipes)
+ return -ENOMEM;
+ memcpy(pipes, dev->pipes, sizeof(*pipes) * dev->pipes_capacity);
+ kfree(dev->pipes);
+ dev->pipes = pipes;
+ id = dev->pipes_capacity;
+ dev->pipes_capacity = new_capacity;
+ }
+ return id;
}

/**
- * goldfish_pipe_open - open a channel to the AVD
+ * goldfish_pipe_open - open a channel to the AVD
* @inode: inode of device
* @file: file struct of opener
*
@@ -525,12 +729,13 @@ static irqreturn_t goldfish_pipe_interru
*/
static int goldfish_pipe_open(struct inode *inode, struct file *file)
{
- struct goldfish_pipe *pipe;
struct goldfish_pipe_dev *dev = pipe_dev;
- int32_t status;
+ unsigned long flags;
+ int id;
+ int status;

/* Allocate new pipe kernel object */
- pipe = kzalloc(sizeof(*pipe), GFP_KERNEL);
+ struct goldfish_pipe *pipe = kzalloc(sizeof(*pipe), GFP_KERNEL);
if (pipe == NULL)
return -ENOMEM;

@@ -539,29 +744,69 @@ static int goldfish_pipe_open(struct ino
init_waitqueue_head(&pipe->wake_queue);

/*
- * Now, tell the emulator we're opening a new pipe. We use the
- * pipe object's address as the channel identifier for simplicity.
+ * Command buffer needs to be allocated on its own page to make sure
+ * it is physically contiguous in host's address space.
*/
+ pipe->command_buffer =
+ (struct goldfish_pipe_command *)__get_free_page(GFP_KERNEL);
+ if (!pipe->command_buffer) {
+ status = -ENOMEM;
+ goto err_pipe;
+ }

- status = goldfish_cmd_status(pipe, CMD_OPEN);
- if (status < 0) {
- kfree(pipe);
- return status;
+ spin_lock_irqsave(&dev->lock, flags);
+
+ id = get_free_pipe_id_locked(dev);
+ if (id < 0) {
+ status = id;
+ goto err_id_locked;
}

+ dev->pipes[id] = pipe;
+ pipe->id = id;
+ pipe->command_buffer->id = id;
+
+ /* Now tell the emulator we're opening a new pipe. */
+ dev->buffers->open_command_params.rw_params_max_count =
+ MAX_BUFFERS_PER_COMMAND;
+ dev->buffers->open_command_params.command_buffer_ptr =
+ (u64)(unsigned long)__pa(pipe->command_buffer);
+ status = goldfish_cmd_locked(pipe, PIPE_CMD_OPEN);
+ spin_unlock_irqrestore(&dev->lock, flags);
+ if (status < 0)
+ goto err_cmd;
/* All is done, save the pipe into the file's private data field */
file->private_data = pipe;
return 0;
+
+err_cmd:
+ spin_lock_irqsave(&dev->lock, flags);
+ dev->pipes[id] = NULL;
+err_id_locked:
+ spin_unlock_irqrestore(&dev->lock, flags);
+ free_page((unsigned long)pipe->command_buffer);
+err_pipe:
+ kfree(pipe);
+ return status;
}

static int goldfish_pipe_release(struct inode *inode, struct file *filp)
{
+ unsigned long flags;
struct goldfish_pipe *pipe = filp->private_data;
+ struct goldfish_pipe_dev *dev = pipe->dev;

/* The guest is closing the channel, so tell the emulator right now */
- goldfish_cmd(pipe, CMD_CLOSE);
- kfree(pipe);
+ (void)goldfish_cmd(pipe, PIPE_CMD_CLOSE);
+
+ spin_lock_irqsave(&dev->lock, flags);
+ dev->pipes[pipe->id] = NULL;
+ signalled_pipes_remove_locked(dev, pipe);
+ spin_unlock_irqrestore(&dev->lock, flags);
+
filp->private_data = NULL;
+ free_page((unsigned long)pipe->command_buffer);
+ kfree(pipe);
return 0;
}

@@ -574,18 +819,91 @@ static const struct file_operations gold
.release = goldfish_pipe_release,
};

-static struct miscdevice goldfish_pipe_device = {
+static struct miscdevice goldfish_pipe_dev = {
.minor = MISC_DYNAMIC_MINOR,
.name = "goldfish_pipe",
.fops = &goldfish_pipe_fops,
};

+static int goldfish_pipe_device_init(struct platform_device *pdev)
+{
+ char *page;
+ struct goldfish_pipe_dev *dev = pipe_dev;
+ int err = devm_request_irq(&pdev->dev, dev->irq,
+ goldfish_pipe_interrupt,
+ IRQF_SHARED, "goldfish_pipe", dev);
+ if (err) {
+ dev_err(&pdev->dev, "unable to allocate IRQ for v2\n");
+ return err;
+ }
+
+ err = misc_register(&goldfish_pipe_dev);
+ if (err) {
+ dev_err(&pdev->dev, "unable to register v2 device\n");
+ return err;
+ }
+
+ dev->first_signalled_pipe = NULL;
+ dev->pipes_capacity = INITIAL_PIPES_CAPACITY;
+ dev->pipes = kcalloc(dev->pipes_capacity, sizeof(*dev->pipes),
+ GFP_KERNEL);
+ if (!dev->pipes)
+ return -ENOMEM;
+
+ /*
+ * We're going to pass two buffers, open_command_params and
+ * signalled_pipe_buffers, to the host. This means each of those buffers
+ * needs to be contained in a single physical page. The easiest choice
+ * is to just allocate a page and place the buffers in it.
+ */
+ if (WARN_ON(sizeof(*dev->buffers) > PAGE_SIZE))
+ return -ENOMEM;
+
+ page = (char *)__get_free_page(GFP_KERNEL);
+ if (!page) {
+ kfree(dev->pipes);
+ return -ENOMEM;
+ }
+ dev->buffers = (struct goldfish_pipe_dev_buffers *)page;
+
+ /* Send the buffer addresses to the host */
+ {
+ u64 paddr = __pa(&dev->buffers->signalled_pipe_buffers);
+
+ writel((u32)(unsigned long)(paddr >> 32),
+ dev->base + PIPE_REG_SIGNAL_BUFFER_HIGH);
+ writel((u32)(unsigned long)paddr,
+ dev->base + PIPE_REG_SIGNAL_BUFFER);
+ writel((u32)MAX_SIGNALLED_PIPES,
+ dev->base + PIPE_REG_SIGNAL_BUFFER_COUNT);
+
+ paddr = __pa(&dev->buffers->open_command_params);
+ writel((u32)(unsigned long)(paddr >> 32),
+ dev->base + PIPE_REG_OPEN_BUFFER_HIGH);
+ writel((u32)(unsigned long)paddr,
+ dev->base + PIPE_REG_OPEN_BUFFER);
+ }
+ return 0;
+}
+
+static void goldfish_pipe_device_deinit(struct platform_device *pdev)
+{
+ struct goldfish_pipe_dev *dev = pipe_dev;
+
+ misc_deregister(&goldfish_pipe_dev);
+ kfree(dev->pipes);
+ free_page((unsigned long)dev->buffers);
+}
+
static int goldfish_pipe_probe(struct platform_device *pdev)
{
int err;
struct resource *r;
struct goldfish_pipe_dev *dev = pipe_dev;

+ if (WARN_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE))
+ return -ENOMEM;
+
/* not thread safe, but this should not happen */
WARN_ON(dev->base != NULL);

@@ -609,26 +927,21 @@ static int goldfish_pipe_probe(struct pl
}
dev->irq = r->start;

- err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt,
- IRQF_SHARED, "goldfish_pipe", dev);
- if (err) {
- dev_err(&pdev->dev, "unable to allocate IRQ\n");
- goto error;
- }
-
- err = misc_register(&goldfish_pipe_device);
- if (err) {
- dev_err(&pdev->dev, "unable to register device\n");
- goto error;
- }
- setup_access_params_addr(pdev, dev);
-
- /* Although the pipe device in the classic Android emulator does not
- * recognize the 'version' register, it won't treat this as an error
- * either and will simply return 0, which is fine.
+ /*
+ * Exchange the versions with the host device
+ *
+ * Note: v1 driver used to not report its version, so we write it before
+ * reading device version back: this allows the host implementation to
+ * detect the old driver (if there was no version write before read).
*/
+ writel((u32)PIPE_DRIVER_VERSION, dev->base + PIPE_REG_VERSION);
dev->version = readl(dev->base + PIPE_REG_VERSION);
- return 0;
+ if (WARN_ON(dev->version < PIPE_CURRENT_DEVICE_VERSION))
+ return -EINVAL;
+
+ err = goldfish_pipe_device_init(pdev);
+ if (!err)
+ return 0;

error:
dev->base = NULL;
@@ -638,7 +951,7 @@ error:
static int goldfish_pipe_remove(struct platform_device *pdev)
{
struct goldfish_pipe_dev *dev = pipe_dev;
- misc_deregister(&goldfish_pipe_device);
+ goldfish_pipe_device_deinit(pdev);
dev->base = NULL;
return 0;
}
@@ -655,17 +968,16 @@ static const struct of_device_id goldfis
};
MODULE_DEVICE_TABLE(of, goldfish_pipe_of_match);

-static struct platform_driver goldfish_pipe = {
+static struct platform_driver goldfish_pipe_driver = {
.probe = goldfish_pipe_probe,
.remove = goldfish_pipe_remove,
.driver = {
.name = "goldfish_pipe",
- .owner = THIS_MODULE,
.of_match_table = goldfish_pipe_of_match,
.acpi_match_table = ACPI_PTR(goldfish_pipe_acpi_match),
}
};

-module_platform_driver(goldfish_pipe);
+module_platform_driver(goldfish_pipe_driver);
MODULE_AUTHOR("David Turner <digit@xxxxxxxxxx>");
MODULE_LICENSE("GPL");