Re: possible deadlock in __do_page_fault

From: Dmitry Vyukov
Date: Tue Jan 22 2019 - 08:53:02 EST


On Tue, Jan 22, 2019 at 11:32 AM Tetsuo Handa
<penguin-kernel@xxxxxxxxxxxxxxxxxxx> wrote:
>
> On 2019/01/22 19:12, Dmitry Vyukov wrote:
> > On Tue, Jan 22, 2019 at 11:02 AM Tetsuo Handa
> > <penguin-kernel@xxxxxxxxxxxxxxxxxxx> wrote:
> >>
> >> On 2018/09/22 8:21, Andrew Morton wrote:
> >>> On Thu, 20 Sep 2018 19:33:15 -0400 Joel Fernandes <joel@xxxxxxxxxxxxxxxxx> wrote:
> >>>
> >>>> On Thu, Sep 20, 2018 at 5:12 PM Todd Kjos <tkjos@xxxxxxxxxx> wrote:
> >>>>>
> >>>>> +Joel Fernandes
> >>>>>
> >>>>> On Thu, Sep 20, 2018 at 2:11 PM Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> wrote:
> >>>>>>
> >>>>>>
> >>>>>> Thanks. Let's cc the ashmem folks.
> >>>>>>
> >>>>
> >>>> This should be fixed by https://patchwork.kernel.org/patch/10572477/
> >>>>
> >>>> It has Neil Brown's Reviewed-by but looks like didn't yet appear in
> >>>> anyone's tree, could Greg take this patch?
> >>>
> >>> All is well. That went into mainline yesterday, with a cc:stable.
> >>>
> >>
> >> This problem was not fixed at all.
> >
> > There are at least 2 other open deadlocks involving ashmem:
>
> Yes, they involve ashmem_shrink_scan() => {shmem|vfs}_fallocate() sequence.
> This approach tries to eliminate this sequence.
>
> >
> > https://syzkaller.appspot.com/bug?extid=148c2885d71194f18d28
> > https://syzkaller.appspot.com/bug?extid=4b8b031b89e6b96c4b2e
> >
> > Does this fix any of these too?
>
> I need checks from ashmem folks whether this approach is possible/correct.
> But you can ask syzbot to test this patch before ashmem folks respond.

Right. Let's do this.

As with any kernel changes only you really know how to apply it, git
tree/base commit info is missing, so let's do guessing and
finger-crossing as usual:

#syz fix: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
master
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 90a8a9f1ac7d..1a890c43a10a 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -75,6 +75,17 @@ struct ashmem_range {
/* LRU list of unpinned pages, protected by ashmem_mutex */
static LIST_HEAD(ashmem_lru_list);

+static struct workqueue_struct *ashmem_wq;
+static atomic_t ashmem_shrink_inflight = ATOMIC_INIT(0);
+static DECLARE_WAIT_QUEUE_HEAD(ashmem_shrink_wait);
+
+struct ashmem_shrink_work {
+ struct work_struct work;
+ struct file *file;
+ loff_t start;
+ loff_t end;
+};
+
/*
* long lru_count - The count of pages on our LRU list.
*
@@ -292,6 +303,7 @@ static ssize_t ashmem_read_iter(struct kiocb *iocb, struct iov_iter *iter)
int ret = 0;

mutex_lock(&ashmem_mutex);
+ wait_event(ashmem_shrink_wait, !atomic_read(&ashmem_shrink_inflight));

/* If size is not set, or set to 0, always return EOF. */
if (asma->size == 0)
@@ -359,6 +371,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
int ret = 0;

mutex_lock(&ashmem_mutex);
+ wait_event(ashmem_shrink_wait, !atomic_read(&ashmem_shrink_inflight));

/* user needs to SET_SIZE before mapping */
if (!asma->size) {
@@ -421,6 +434,19 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
return ret;
}

+static void ashmem_shrink_worker(struct work_struct *work)
+{
+ struct ashmem_shrink_work *w = container_of(work, typeof(*w), work);
+
+ w->file->f_op->fallocate(w->file,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ w->start, w->end - w->start);
+ fput(w->file);
+ kfree(w);
+ if (atomic_dec_and_test(&ashmem_shrink_inflight))
+ wake_up_all(&ashmem_shrink_wait);
+}
+
/*
* ashmem_shrink - our cache shrinker, called from mm/vmscan.c
*
@@ -449,12 +475,18 @@ ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
return -1;

list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) {
- loff_t start = range->pgstart * PAGE_SIZE;
- loff_t end = (range->pgend + 1) * PAGE_SIZE;
+ struct ashmem_shrink_work *w = kzalloc(sizeof(*w), GFP_ATOMIC);
+
+ if (!w)
+ break;
+ INIT_WORK(&w->work, ashmem_shrink_worker);
+ w->file = range->asma->file;
+ get_file(w->file);
+ w->start = range->pgstart * PAGE_SIZE;
+ w->end = (range->pgend + 1) * PAGE_SIZE;
+ atomic_inc(&ashmem_shrink_inflight);
+ queue_work(ashmem_wq, &w->work);

- range->asma->file->f_op->fallocate(range->asma->file,
- FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- start, end - start);
range->purged = ASHMEM_WAS_PURGED;
lru_del(range);

@@ -713,6 +745,7 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd,
return -EFAULT;

mutex_lock(&ashmem_mutex);
+ wait_event(ashmem_shrink_wait, !atomic_read(&ashmem_shrink_inflight));

if (!asma->file)
goto out_unlock;
@@ -883,8 +916,15 @@ static int __init ashmem_init(void)
goto out_free2;
}

+ ashmem_wq = alloc_workqueue("ashmem_wq", WQ_MEM_RECLAIM, 0);
+ if (!ashmem_wq) {
+ pr_err("failed to create workqueue\n");
+ goto out_demisc;
+ }
+
ret = register_shrinker(&ashmem_shrinker);
if (ret) {
+ destroy_workqueue(ashmem_wq);
pr_err("failed to register shrinker!\n");
goto out_demisc;
}