Re: [PATCH] mm: hugetlb: fix UAF in hugetlb_handle_userfault

From: Mike Kravetz
Date: Wed Sep 21 2022 - 22:36:08 EST


On 09/21/22 17:57, John Hubbard wrote:
> On 9/21/22 16:57, Mike Kravetz wrote:
> > On 09/21/22 10:48, Mike Kravetz wrote:
> >> On 09/21/22 16:34, Liu Shixin wrote:
> >>> The vma_lock and hugetlb_fault_mutex are dropped before handling
> >>> userfault and reacquire them again after handle_userfault(), but
> >>> reacquire the vma_lock could lead to UAF[1] due to the following
> >>> race,
> >>>
> >>> hugetlb_fault
> >>> hugetlb_no_page
> >>> /*unlock vma_lock */
> >>> hugetlb_handle_userfault
> >>> handle_userfault
> >>> /* unlock mm->mmap_lock*/
> >>> vm_mmap_pgoff
> >>> do_mmap
> >>> mmap_region
> >>> munmap_vma_range
> >>> /* clean old vma */
> >>> /* lock vma_lock again <--- UAF */
> >>> /* unlock vma_lock */
> >>>
> >>> Since the vma_lock will unlock immediately after hugetlb_handle_userfault(),
> >>> let's drop the unneeded lock and unlock in hugetlb_handle_userfault() to fix
> >>> the issue.
> >>
> >> Thank you very much!
> >>
> >> When I saw this report, the obvious fix was to do something like what you have
> >> done below. That looks fine with a few minor comments.
> >>
> >> One question I have not yet answered is, "Does this same issue apply to
> >> follow_hugetlb_page()?". I believe it does. follow_hugetlb_page calls
> >> hugetlb_fault which could result in the fault being processed by userfaultfd.
> >> If we experience the race above, then the associated vma could no longer be
> >> valid when returning from hugetlb_fault. follow_hugetlb_page and callers
> >> have a flag (locked) to deal with dropping mmap lock. However, I am not sure
> >> if it is handled correctly WRT userfaultfd. I think this needs to be answered
> >> before fixing. And, if the follow_hugetlb_page code needs to be fixed it
> >> should be done at the same time.
> >>
> >
> > To at least verify this code path, I added userfaultfd handling to the gup_test
> > program in kernel selftests. When doing basic gup test on a hugetlb page in
>
> Just for those of us who are easily confused by userfaultfd cases, can you show
> what that patch is? It would help me understand this a little faster.

The ugly (just throw code at it to make it work) diff is below. All I did was
add the sample code from the userfaultfd man page.

With that change in place, I just run 'gup_test -U -z -m 2 -H'

> Actually I'm expecting that Peter can easily answer this whole thing. :)

diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index e43879291dac..491424d0a039 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -8,8 +8,11 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
+#include <sys/syscall.h>
#include <pthread.h>
#include <assert.h>
+#include <poll.h>
+#include <linux/userfaultfd.h>
#include <mm/gup_test.h>
#include "../kselftest.h"

@@ -48,6 +51,75 @@ static char *cmd_to_str(unsigned long cmd)
return "Unknown command";
}

+static void *fault_handler_thread(void *arg)
+{
+ static struct uffd_msg msg; /* Data read from userfaultfd */
+ static int fault_cnt = 0; /* Number of faults so far handled */
+ long uffd;
+ static char *page = NULL;
+ struct uffdio_copy uffdio_copy;
+ ssize_t nread;
+ size_t page_size = 2 * 1024 * 1024;
+
+ uffd = (long) arg;
+
+ if (page == NULL) {
+ page = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (page == MAP_FAILED)
+ exit(1);
+ }
+
+ for (;;) {
+ struct pollfd pollfd;
+ int nready;
+ pollfd.fd = uffd;
+ pollfd.events = POLLIN;
+ nready = poll(&pollfd, 1, -1);
+ if (nready == -1)
+ exit(1);
+
+ printf("\nfault_handler_thread():\n");
+ printf(" poll() returns: nready = %d; "
+ "POLLIN = %d; POLLERR = %d\n", nready,
+ (pollfd.revents & POLLIN) != 0,
+ (pollfd.revents & POLLERR) != 0);
+
+ nread = read(uffd, &msg, sizeof(msg));
+ if (nread == 0) {
+ printf("EOF on userfaultfd!\n");
+ exit(1);
+ }
+
+ if (nread == -1) {
+ printf("nread == -1\n");
+ exit(1);
+ }
+
+ if (msg.event != UFFD_EVENT_PAGEFAULT) {
+ printf("Unexpected event on userfaultfd\n");
+ exit(1);
+ }
+
+ printf(" UFFD_EVENT_PAGEFAULT event: ");
+ printf("flags = %llx; ", msg.arg.pagefault.flags);
+ printf("address = %llx\n", msg.arg.pagefault.address);
+
+ memset(page, 'A' + fault_cnt % 20, page_size);
+ fault_cnt++;
+
+ uffdio_copy.src = (unsigned long) page;
+ uffdio_copy.dst = (unsigned long) msg.arg.pagefault.address &
+ ~(page_size - 1);
+ uffdio_copy.len = page_size;
+ uffdio_copy.mode = 0;
+ uffdio_copy.copy = 0;
+ if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1)
+ exit(1);
+
+ }
+}
+
void *gup_thread(void *data)
{
struct gup_test gup = *(struct gup_test *)data;
@@ -94,7 +166,11 @@ int main(int argc, char **argv)
int flags = MAP_PRIVATE, touch = 0;
char *file = "/dev/zero";
pthread_t *tid;
+ pthread_t thr;
char *p;
+ long uffd;
+ struct uffdio_api uffdio_api;
+ struct uffdio_register uffdio_register;

while ((opt = getopt(argc, argv, "m:r:n:F:f:abcj:tTLUuwWSHpz")) != -1) {
switch (opt) {
@@ -230,6 +306,18 @@ int main(int argc, char **argv)
exit(KSFT_SKIP);
}

+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd == -1) {
+ perror("uffd open");
+ exit(1);
+ }
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
+ perror("uffd ioctl API");
+ exit(1);
+ }
+
p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
if (p == MAP_FAILED) {
perror("mmap");
@@ -237,6 +325,20 @@ int main(int argc, char **argv)
}
gup.addr = (unsigned long)p;

+ uffdio_register.range.start = (unsigned long)p;
+ uffdio_register.range.len = size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
+ perror("uffd ioctl API");
+ exit(1);
+ }
+ ret = pthread_create(&thr, NULL, fault_handler_thread, (void *)uffd);
+ if (ret) {
+ exit(1);
+ }
+
+ sleep(5);
+
if (thp == 1)
madvise(p, size, MADV_HUGEPAGE);
else if (thp == 0)
--
Mike Kravetz