[PATCH 4/5] android: binder: Add global lru shrinker to binder

From: Sherry Yang
Date: Tue Aug 15 2017 - 20:26:27 EST


Hold on to the pages allocated and mapped for transaction
buffers until the system is under memory pressure. When
that happens, use linux shrinker to free pages. Without
using shrinker, patch "android: binder: Move buffer out
of area shared with user space" will cause a significant
slow down for small transactions that fit into the first
page because free list buffer header used to be inlined
with buffer data.

In addition to prevent the performance regression for
small transactions, this patch improves the performance
for transactions that take up more than one page.

Modify alloc selftest to work with the shrinker change.

Test: Run memory intensive applications (Chrome and Camera)
to trigger shrinker callbacks. Binder frees memory as expected.
Test: Run binderThroughputTest with high memory pressure
option enabled.

Change-Id: I83409cc8b7d99684c99bc383880f3dfedaedca83
Signed-off-by: Sherry Yang <sherryy@xxxxxxxxxxx>
---
drivers/android/binder.c | 2 +
drivers/android/binder_alloc.c | 172 +++++++++++++++++++++++++++-----
drivers/android/binder_alloc.h | 23 ++++-
drivers/android/binder_alloc_selftest.c | 68 ++++++++++---
4 files changed, 225 insertions(+), 40 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index b31e64c6f666..fc5a4b9f3d97 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -5243,6 +5243,8 @@ static int __init binder_init(void)
struct binder_device *device;
struct hlist_node *tmp;

+ binder_alloc_shrinker_init();
+
atomic_set(&binder_transaction_log.cur, ~0U);
atomic_set(&binder_transaction_log_failed.cur, ~0U);

diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index e96659215f25..11a08bf72bcc 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -27,9 +27,12 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/list_lru.h>
#include "binder_alloc.h"
#include "binder_trace.h"

+struct list_lru binder_alloc_lru;
+
static DEFINE_MUTEX(binder_alloc_mmap_lock);

enum {
@@ -188,8 +191,9 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
{
void *page_addr;
unsigned long user_page_addr;
- struct page **page;
- struct mm_struct *mm;
+ struct binder_lru_page *page;
+ struct mm_struct *mm = NULL;
+ bool need_mm = false;

binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
"%d: %s pages %pK-%pK\n", alloc->pid,
@@ -200,9 +204,18 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,

trace_binder_update_page_range(alloc, allocate, start, end);

- if (vma)
- mm = NULL;
- else
+ if (allocate == 0)
+ goto free_range;
+
+ for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
+ page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE];
+ if (!page->page_ptr) {
+ need_mm = true;
+ break;
+ }
+ }
+
+ if (!vma && need_mm)
mm = get_task_mm(alloc->tsk);

if (mm) {
@@ -215,10 +228,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
}
}

- if (allocate == 0)
- goto free_range;
-
- if (vma == NULL) {
+ if (!vma && need_mm) {
pr_err("%d: binder_alloc_buf failed to map pages in userspace, no vma\n",
alloc->pid);
goto err_no_vma;
@@ -226,18 +236,33 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,

for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
int ret;
+ bool on_lru;

page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE];

- BUG_ON(*page);
- *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
- if (*page == NULL) {
+ if (page->page_ptr) {
+ on_lru = list_lru_del(&binder_alloc_lru, &page->lru);
+ WARN_ON(!on_lru);
+ continue;
+ }
+
+ if (WARN_ON(!vma))
+ goto err_page_ptr_cleared;
+
+ page->page_ptr = alloc_page(GFP_KERNEL |
+ __GFP_HIGHMEM |
+ __GFP_ZERO);
+ if (!page->page_ptr) {
pr_err("%d: binder_alloc_buf failed for page at %pK\n",
alloc->pid, page_addr);
goto err_alloc_page_failed;
}
+ page->alloc = alloc;
+ INIT_LIST_HEAD(&page->lru);
+
ret = map_kernel_range_noflush((unsigned long)page_addr,
- PAGE_SIZE, PAGE_KERNEL, page);
+ PAGE_SIZE, PAGE_KERNEL,
+ &page->page_ptr);
flush_cache_vmap((unsigned long)page_addr,
(unsigned long)page_addr + PAGE_SIZE);
if (ret != 1) {
@@ -247,7 +272,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
}
user_page_addr =
(uintptr_t)page_addr + alloc->user_buffer_offset;
- ret = vm_insert_page(vma, user_page_addr, page[0]);
+ ret = vm_insert_page(vma, user_page_addr, page[0].page_ptr);
if (ret) {
pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n",
alloc->pid, user_page_addr);
@@ -264,16 +289,21 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
free_range:
for (page_addr = end - PAGE_SIZE; page_addr >= start;
page_addr -= PAGE_SIZE) {
+ bool ret;
+
page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE];
- if (vma)
- zap_page_range(vma, (uintptr_t)page_addr +
- alloc->user_buffer_offset, PAGE_SIZE);
+
+ ret = list_lru_add(&binder_alloc_lru, &page->lru);
+ WARN_ON(!ret);
+ continue;
+
err_vm_insert_page_failed:
unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);
err_map_kernel_failed:
- __free_page(*page);
- *page = NULL;
+ __free_page(page->page_ptr);
+ page->page_ptr = NULL;
err_alloc_page_failed:
+err_page_ptr_cleared:
;
}
err_no_vma:
@@ -731,16 +761,20 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc)

for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
void *page_addr;
+ bool on_lru;

- if (!alloc->pages[i])
+ if (!alloc->pages[i].page_ptr)
continue;

+ on_lru = list_lru_del(&binder_alloc_lru,
+ &alloc->pages[i].lru);
page_addr = alloc->buffer + i * PAGE_SIZE;
binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
- "%s: %d: page %d at %pK not freed\n",
- __func__, alloc->pid, i, page_addr);
+ "%s: %d: page %d at %pK %s\n",
+ __func__, alloc->pid, i, page_addr,
+ on_lru ? "on lru" : "active");
unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);
- __free_page(alloc->pages[i]);
+ __free_page(alloc->pages[i].page_ptr);
page_count++;
}
kfree(alloc->pages);
@@ -816,6 +850,93 @@ void binder_alloc_vma_close(struct binder_alloc *alloc)
WRITE_ONCE(alloc->vma_vm_mm, NULL);
}

+/**
+ * binder_alloc_free_page() - shrinker callback to free pages
+ * @item: item to free
+ * @lock: lock protecting the item
+ * @cb_arg: callback argument
+ *
+ * Called from list_lru_walk() in binder_shrink_scan() to free
+ * up pages when the system is under memory pressure.
+ */
+enum lru_status binder_alloc_free_page(struct list_head *item,
+ struct list_lru_one *lru,
+ spinlock_t *lock,
+ void *cb_arg)
+{
+ struct mm_struct *mm = NULL;
+ struct binder_lru_page *page = container_of(item,
+ struct binder_lru_page,
+ lru);
+ struct binder_alloc *alloc;
+ uintptr_t page_addr;
+ size_t index;
+
+ alloc = page->alloc;
+ if (!mutex_trylock(&alloc->mutex))
+ goto err_get_alloc_mutex_failed;
+
+ if (!page->page_ptr)
+ goto err_page_already_freed;
+
+ index = page - alloc->pages;
+ page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE;
+ if (alloc->vma) {
+ mm = get_task_mm(alloc->tsk);
+ if (!mm)
+ goto err_get_task_mm_failed;
+ if (!down_write_trylock(&mm->mmap_sem))
+ goto err_down_write_mmap_sem_failed;
+
+ zap_page_range(alloc->vma,
+ page_addr + alloc->user_buffer_offset,
+ PAGE_SIZE);
+
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+ }
+
+ unmap_kernel_range(page_addr, PAGE_SIZE);
+ __free_page(page->page_ptr);
+ page->page_ptr = NULL;
+
+ list_lru_isolate(lru, item);
+
+ mutex_unlock(&alloc->mutex);
+ return LRU_REMOVED;
+
+err_down_write_mmap_sem_failed:
+ mmput(mm);
+err_get_task_mm_failed:
+err_page_already_freed:
+ mutex_unlock(&alloc->mutex);
+err_get_alloc_mutex_failed:
+ return LRU_SKIP;
+}
+
+static unsigned long
+binder_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ unsigned long ret = list_lru_count(&binder_alloc_lru);
+ return ret;
+}
+
+static unsigned long
+binder_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ unsigned long ret;
+
+ ret = list_lru_walk(&binder_alloc_lru, binder_alloc_free_page,
+ NULL, sc->nr_to_scan);
+ return ret;
+}
+
+struct shrinker binder_shrinker = {
+ .count_objects = binder_shrink_count,
+ .scan_objects = binder_shrink_scan,
+ .seeks = DEFAULT_SEEKS,
+};
+
/**
* binder_alloc_init() - called by binder_open() for per-proc initialization
* @alloc: binder_alloc for this proc
@@ -830,3 +951,8 @@ void binder_alloc_init(struct binder_alloc *alloc)
mutex_init(&alloc->mutex);
}

+void binder_alloc_shrinker_init(void)
+{
+ list_lru_init(&binder_alloc_lru);
+ register_shrinker(&binder_shrinker);
+}
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index dd5649bf6469..fa707cc63393 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -21,7 +21,9 @@
#include <linux/rtmutex.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
+#include <linux/list_lru.h>

+extern struct list_lru binder_alloc_lru;
struct binder_transaction;

/**
@@ -60,6 +62,18 @@ struct binder_buffer {
void *data;
};

+/**
+ * struct binder_lru_page - page object used for binder shrinker
+ * @page_ptr: pointer to physical page in mmap'd space
+ * @lru: entry in binder_alloc_lru
+ * @alloc: binder_alloc for a proc
+ */
+struct binder_lru_page {
+ struct list_head lru;
+ struct page *page_ptr;
+ struct binder_alloc *alloc;
+};
+
/**
* struct binder_alloc - per-binder proc state for binder allocator
* @vma: vm_area_struct passed to mmap_handler
@@ -75,8 +89,7 @@ struct binder_buffer {
* @allocated_buffers: rb tree of allocated buffers sorted by address
* @free_async_space: VA space available for async buffers. This is
* initialized at mmap time to 1/2 the full VA space
- * @pages: array of physical page addresses for each
- * page of mmap'd space
+ * @pages: array of binder_lru_page
* @buffer_size: size of address space specified via mmap
* @pid: pid for associated binder_proc (invariant after init)
*
@@ -96,7 +109,7 @@ struct binder_alloc {
struct rb_root free_buffers;
struct rb_root allocated_buffers;
size_t free_async_space;
- struct page **pages;
+ struct binder_lru_page *pages;
size_t buffer_size;
uint32_t buffer_free;
int pid;
@@ -107,12 +120,16 @@ void binder_selftest_alloc(struct binder_alloc *alloc);
#else
static inline void binder_selftest_alloc(struct binder_alloc *alloc) {}
#endif
+enum lru_status binder_alloc_free_page(struct list_head *item,
+ struct list_lru_one *lru,
+ spinlock_t *lock, void *cb_arg);
extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
size_t data_size,
size_t offsets_size,
size_t extra_buffers_size,
int is_async);
extern void binder_alloc_init(struct binder_alloc *alloc);
+void binder_alloc_shrinker_init(void);
extern void binder_alloc_vma_close(struct binder_alloc *alloc);
extern struct binder_buffer *
binder_alloc_prepare_to_free(struct binder_alloc *alloc,
diff --git a/drivers/android/binder_alloc_selftest.c b/drivers/android/binder_alloc_selftest.c
index 0bf72079a9da..8bd7bcef967d 100644
--- a/drivers/android/binder_alloc_selftest.c
+++ b/drivers/android/binder_alloc_selftest.c
@@ -109,9 +109,11 @@ static bool check_buffer_pages_allocated(struct binder_alloc *alloc,
page_addr = buffer->data;
for (; page_addr < end; page_addr += PAGE_SIZE) {
page_index = (page_addr - alloc->buffer) / PAGE_SIZE;
- if (!alloc->pages[page_index]) {
- pr_err("incorrect alloc state at page index %d\n",
- page_index);
+ if (!alloc->pages[page_index].page_ptr ||
+ !list_empty(&alloc->pages[page_index].lru)) {
+ pr_err("expect alloc but is %s at page index %d\n",
+ alloc->pages[page_index].page_ptr ?
+ "lru" : "free", page_index);
return false;
}
}
@@ -137,28 +139,63 @@ static void binder_selftest_alloc_buf(struct binder_alloc *alloc,

static void binder_selftest_free_buf(struct binder_alloc *alloc,
struct binder_buffer *buffers[],
- size_t *sizes, int *seq)
+ size_t *sizes, int *seq, size_t end)
{
int i;

for (i = 0; i < BUFFER_NUM; i++)
binder_alloc_free_buf(alloc, buffers[seq[i]]);

+ for (i = 0; i < end / PAGE_SIZE; i++) {
+ /**
+ * Error message on a free page can be false positive
+ * if binder shrinker ran during binder_alloc_free_buf
+ * calls above.
+ */
+ if (list_empty(&alloc->pages[i].lru)) {
+ pr_err_size_seq(sizes, seq);
+ pr_err("expect lru but is %s at page index %d\n",
+ alloc->pages[i].page_ptr ? "alloc" : "free", i);
+ binder_selftest_failures++;
+ }
+ }
+}
+
+static void binder_selftest_free_page(struct binder_alloc *alloc)
+{
+ int i;
+ unsigned long count;
+
+ while ((count = list_lru_count(&binder_alloc_lru))) {
+ list_lru_walk(&binder_alloc_lru, binder_alloc_free_page,
+ NULL, count);
+ }
+
for (i = 0; i < (alloc->buffer_size / PAGE_SIZE); i++) {
- if ((!alloc->pages[i]) == (i == 0)) {
- pr_err("incorrect free state at page index %d\n", i);
+ if (alloc->pages[i].page_ptr) {
+ pr_err("expect free but is %s at page index %d\n",
+ list_empty(&alloc->pages[i].lru) ?
+ "alloc" : "lru", i);
binder_selftest_failures++;
}
}
}

static void binder_selftest_alloc_free(struct binder_alloc *alloc,
- size_t *sizes, int *seq)
+ size_t *sizes, int *seq, size_t end)
{
struct binder_buffer *buffers[BUFFER_NUM];

binder_selftest_alloc_buf(alloc, buffers, sizes, seq);
- binder_selftest_free_buf(alloc, buffers, sizes, seq);
+ binder_selftest_free_buf(alloc, buffers, sizes, seq, end);
+
+ /* Allocate from lru. */
+ binder_selftest_alloc_buf(alloc, buffers, sizes, seq);
+ if (list_lru_count(&binder_alloc_lru))
+ pr_err("lru list should be empty but is not\n");
+
+ binder_selftest_free_buf(alloc, buffers, sizes, seq, end);
+ binder_selftest_free_page(alloc);
}

static bool is_dup(int *seq, int index, int val)
@@ -174,19 +211,20 @@ static bool is_dup(int *seq, int index, int val)

/* Generate BUFFER_NUM factorial free orders. */
static void binder_selftest_free_seq(struct binder_alloc *alloc,
- size_t *sizes, int *seq, int index)
+ size_t *sizes, int *seq,
+ int index, size_t end)
{
int i;

if (index == BUFFER_NUM) {
- binder_selftest_alloc_free(alloc, sizes, seq);
+ binder_selftest_alloc_free(alloc, sizes, seq, end);
return;
}
for (i = 0; i < BUFFER_NUM; i++) {
if (is_dup(seq, index, i))
continue;
seq[index] = i;
- binder_selftest_free_seq(alloc, sizes, seq, index + 1);
+ binder_selftest_free_seq(alloc, sizes, seq, index + 1, end);
}
}

@@ -211,8 +249,9 @@ static void binder_selftest_alloc_size(struct binder_alloc *alloc,
* we need one giant buffer before getting to the last page.
*/
back_sizes[0] += alloc->buffer_size - end_offset[BUFFER_NUM - 1];
- binder_selftest_free_seq(alloc, front_sizes, seq, 0);
- binder_selftest_free_seq(alloc, back_sizes, seq, 0);
+ binder_selftest_free_seq(alloc, front_sizes, seq, 0,
+ end_offset[BUFFER_NUM - 1]);
+ binder_selftest_free_seq(alloc, back_sizes, seq, 0, alloc->buffer_size);
}

static void binder_selftest_alloc_offset(struct binder_alloc *alloc,
@@ -246,7 +285,8 @@ static void binder_selftest_alloc_offset(struct binder_alloc *alloc,
*
* Allocate BUFFER_NUM buffers to cover all page alignment cases,
* then free them in all orders possible. Check that pages are
- * allocated after buffer alloc and freed after freeing buffer.
+ * correctly allocated, put onto lru when buffers are freed, and
+ * are freed when binder_alloc_free_page is called.
*/
void binder_selftest_alloc(struct binder_alloc *alloc)
{
--
2.14.1.480.gb18f417b89-goog