Re: [PATCH v7 03/19] gup: Turn fault_in_pages_{readable,writeable} into fault_in_{readable,writeable}

From: Andreas Gruenbacher
Date: Tue Sep 28 2021 - 11:03:05 EST


On Fri, Sep 3, 2021 at 4:57 PM Filipe Manana <fdmanana@xxxxxxxxx> wrote:
> On Fri, Aug 27, 2021 at 5:52 PM Andreas Gruenbacher <agruenba@xxxxxxxxxx> wrote:
> >
> > Turn fault_in_pages_{readable,writeable} into versions that return the
> > number of bytes not faulted in (similar to copy_to_user) instead of
> > returning a non-zero value when any of the requested pages couldn't be
> > faulted in. This supports the existing users that require all pages to
> > be faulted in as well as new users that are happy if any pages can be
> > faulted in at all.
> >
> > Neither of these functions is entirely trivial and it doesn't seem
> > useful to inline them, so move them to mm/gup.c.
> >
> > Rename the functions to fault_in_{readable,writeable} to make sure that
> > this change doesn't silently break things.
> >
> > Signed-off-by: Andreas Gruenbacher <agruenba@xxxxxxxxxx>
> > ---
> > arch/powerpc/kernel/kvm.c | 3 +-
> > arch/powerpc/kernel/signal_32.c | 4 +-
> > arch/powerpc/kernel/signal_64.c | 2 +-
> > arch/x86/kernel/fpu/signal.c | 7 ++-
> > drivers/gpu/drm/armada/armada_gem.c | 7 ++-
> > fs/btrfs/ioctl.c | 5 +-
> > include/linux/pagemap.h | 57 ++---------------------
> > lib/iov_iter.c | 10 ++--
> > mm/filemap.c | 2 +-
> > mm/gup.c | 72 +++++++++++++++++++++++++++++
> > 10 files changed, 93 insertions(+), 76 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
> > index d89cf802d9aa..6568823cf306 100644
> > --- a/arch/powerpc/kernel/kvm.c
> > +++ b/arch/powerpc/kernel/kvm.c
> > @@ -669,7 +669,8 @@ static void __init kvm_use_magic_page(void)
> > on_each_cpu(kvm_map_magic_page, &features, 1);
> >
> > /* Quick self-test to see if the mapping works */
> > - if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) {
> > + if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE,
> > + sizeof(u32))) {
> > kvm_patching_worked = false;
> > return;
> > }
> > diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
> > index 0608581967f0..38c3eae40c14 100644
> > --- a/arch/powerpc/kernel/signal_32.c
> > +++ b/arch/powerpc/kernel/signal_32.c
> > @@ -1048,7 +1048,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
> > if (new_ctx == NULL)
> > return 0;
> > if (!access_ok(new_ctx, ctx_size) ||
> > - fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
> > + fault_in_readable((char __user *)new_ctx, ctx_size))
> > return -EFAULT;
> >
> > /*
> > @@ -1237,7 +1237,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
> > #endif
> >
> > if (!access_ok(ctx, sizeof(*ctx)) ||
> > - fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx)))
> > + fault_in_readable((char __user *)ctx, sizeof(*ctx)))
> > return -EFAULT;
> >
> > /*
> > diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
> > index 1831bba0582e..9f471b4a11e3 100644
> > --- a/arch/powerpc/kernel/signal_64.c
> > +++ b/arch/powerpc/kernel/signal_64.c
> > @@ -688,7 +688,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
> > if (new_ctx == NULL)
> > return 0;
> > if (!access_ok(new_ctx, ctx_size) ||
> > - fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
> > + fault_in_readable((char __user *)new_ctx, ctx_size))
> > return -EFAULT;
> >
> > /*
> > diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
> > index 445c57c9c539..ba6bdec81603 100644
> > --- a/arch/x86/kernel/fpu/signal.c
> > +++ b/arch/x86/kernel/fpu/signal.c
> > @@ -205,7 +205,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
> > fpregs_unlock();
> >
> > if (ret) {
> > - if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size))
> > + if (!fault_in_writeable(buf_fx, fpu_user_xstate_size))
> > goto retry;
> > return -EFAULT;
> > }
> > @@ -278,10 +278,9 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore,
> > if (ret != -EFAULT)
> > return -EINVAL;
> >
> > - ret = fault_in_pages_readable(buf, size);
> > - if (!ret)
> > + if (!fault_in_readable(buf, size))
> > goto retry;
> > - return ret;
> > + return -EFAULT;
> > }
> >
> > /*
> > diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
> > index 21909642ee4c..8fbb25913327 100644
> > --- a/drivers/gpu/drm/armada/armada_gem.c
> > +++ b/drivers/gpu/drm/armada/armada_gem.c
> > @@ -336,7 +336,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> > struct drm_armada_gem_pwrite *args = data;
> > struct armada_gem_object *dobj;
> > char __user *ptr;
> > - int ret;
> > + int ret = 0;
> >
> > DRM_DEBUG_DRIVER("handle %u off %u size %u ptr 0x%llx\n",
> > args->handle, args->offset, args->size, args->ptr);
> > @@ -349,9 +349,8 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> > if (!access_ok(ptr, args->size))
> > return -EFAULT;
> >
> > - ret = fault_in_pages_readable(ptr, args->size);
> > - if (ret)
> > - return ret;
> > + if (fault_in_readable(ptr, args->size))
> > + return -EFAULT;
> >
> > dobj = armada_gem_object_lookup(file, args->handle);
> > if (dobj == NULL)
> > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> > index 0ba98e08a029..9233ecc31e2e 100644
> > --- a/fs/btrfs/ioctl.c
> > +++ b/fs/btrfs/ioctl.c
> > @@ -2244,9 +2244,8 @@ static noinline int search_ioctl(struct inode *inode,
> > key.offset = sk->min_offset;
> >
> > while (1) {
> > - ret = fault_in_pages_writeable(ubuf + sk_offset,
> > - *buf_size - sk_offset);
> > - if (ret)
> > + ret = -EFAULT;
> > + if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset))
> > break;
> >
> > ret = btrfs_search_forward(root, &key, path, sk->min_transid);
> > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> > index ed02aa522263..7c9edc9694d9 100644
> > --- a/include/linux/pagemap.h
> > +++ b/include/linux/pagemap.h
> > @@ -734,61 +734,10 @@ int wait_on_page_private_2_killable(struct page *page);
> > extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
> >
> > /*
> > - * Fault everything in given userspace address range in.
> > + * Fault in userspace address range.
> > */
> > -static inline int fault_in_pages_writeable(char __user *uaddr, int size)
> > -{
> > - char __user *end = uaddr + size - 1;
> > -
> > - if (unlikely(size == 0))
> > - return 0;
> > -
> > - if (unlikely(uaddr > end))
> > - return -EFAULT;
> > - /*
> > - * Writing zeroes into userspace here is OK, because we know that if
> > - * the zero gets there, we'll be overwriting it.
> > - */
> > - do {
> > - if (unlikely(__put_user(0, uaddr) != 0))
> > - return -EFAULT;
> > - uaddr += PAGE_SIZE;
> > - } while (uaddr <= end);
> > -
> > - /* Check whether the range spilled into the next page. */
> > - if (((unsigned long)uaddr & PAGE_MASK) ==
> > - ((unsigned long)end & PAGE_MASK))
> > - return __put_user(0, end);
> > -
> > - return 0;
> > -}
> > -
> > -static inline int fault_in_pages_readable(const char __user *uaddr, int size)
> > -{
> > - volatile char c;
> > - const char __user *end = uaddr + size - 1;
> > -
> > - if (unlikely(size == 0))
> > - return 0;
> > -
> > - if (unlikely(uaddr > end))
> > - return -EFAULT;
> > -
> > - do {
> > - if (unlikely(__get_user(c, uaddr) != 0))
> > - return -EFAULT;
> > - uaddr += PAGE_SIZE;
> > - } while (uaddr <= end);
> > -
> > - /* Check whether the range spilled into the next page. */
> > - if (((unsigned long)uaddr & PAGE_MASK) ==
> > - ((unsigned long)end & PAGE_MASK)) {
> > - return __get_user(c, end);
> > - }
> > -
> > - (void)c;
> > - return 0;
> > -}
> > +size_t fault_in_writeable(char __user *uaddr, size_t size);
> > +size_t fault_in_readable(const char __user *uaddr, size_t size);
> >
> > int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
> > pgoff_t index, gfp_t gfp_mask);
> > diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> > index 25dfc48536d7..069cedd9d7b4 100644
> > --- a/lib/iov_iter.c
> > +++ b/lib/iov_iter.c
> > @@ -191,7 +191,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
> > buf = iov->iov_base + skip;
> > copy = min(bytes, iov->iov_len - skip);
> >
> > - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
> > + if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) {
> > kaddr = kmap_atomic(page);
> > from = kaddr + offset;
> >
> > @@ -275,7 +275,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
> > buf = iov->iov_base + skip;
> > copy = min(bytes, iov->iov_len - skip);
> >
> > - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
> > + if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) {
> > kaddr = kmap_atomic(page);
> > to = kaddr + offset;
> >
> > @@ -446,13 +446,11 @@ int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes)
> > bytes = i->count;
> > for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) {
> > size_t len = min(bytes, p->iov_len - skip);
> > - int err;
> >
> > if (unlikely(!len))
> > continue;
> > - err = fault_in_pages_readable(p->iov_base + skip, len);
> > - if (unlikely(err))
> > - return err;
> > + if (fault_in_readable(p->iov_base + skip, len))
> > + return -EFAULT;
> > bytes -= len;
> > }
> > }
> > diff --git a/mm/filemap.c b/mm/filemap.c
> > index d1458ecf2f51..4dec3bc7752e 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -88,7 +88,7 @@
> > * ->lock_page (access_process_vm)
> > *
> > * ->i_mutex (generic_perform_write)
> > - * ->mmap_lock (fault_in_pages_readable->do_page_fault)
> > + * ->mmap_lock (fault_in_readable->do_page_fault)
> > *
> > * bdi->wb.list_lock
> > * sb_lock (fs/fs-writeback.c)
> > diff --git a/mm/gup.c b/mm/gup.c
> > index b94717977d17..0cf47955e5a1 100644
> > --- a/mm/gup.c
> > +++ b/mm/gup.c
> > @@ -1672,6 +1672,78 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
> > }
> > #endif /* !CONFIG_MMU */
> >
> > +/**
> > + * fault_in_writeable - fault in userspace address range for writing
> > + * @uaddr: start of address range
> > + * @size: size of address range
> > + *
> > + * Returns the number of bytes not faulted in (like copy_to_user() and
> > + * copy_from_user()).
> > + */
> > +size_t fault_in_writeable(char __user *uaddr, size_t size)
> > +{
> > + char __user *start = uaddr, *end;
> > +
> > + if (unlikely(size == 0))
> > + return 0;
> > + if (!PAGE_ALIGNED(uaddr)) {
> > + if (unlikely(__put_user(0, uaddr) != 0))
> > + return size;
> > + uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
> > + }
> > + end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
> > + if (unlikely(end < start))
> > + end = NULL;
> > + while (uaddr != end) {
> > + if (unlikely(__put_user(0, uaddr) != 0))
> > + goto out;
> > + uaddr += PAGE_SIZE;
>
> Won't we loop endlessly or corrupt some unwanted page when 'end' was
> set to NULL?

What do you mean? We set 'end' to NULL when start + size < start
exactly so that the loop will stop when uaddr wraps around.

> > + }
> > +
> > +out:
> > + if (size > uaddr - start)
> > + return size - (uaddr - start);
> > + return 0;
> > +}
> > +EXPORT_SYMBOL(fault_in_writeable);
> > +
> > +/**
> > + * fault_in_readable - fault in userspace address range for reading
> > + * @uaddr: start of user address range
> > + * @size: size of user address range
> > + *
> > + * Returns the number of bytes not faulted in (like copy_to_user() and
> > + * copy_from_user()).
> > + */
> > +size_t fault_in_readable(const char __user *uaddr, size_t size)
> > +{
> > + const char __user *start = uaddr, *end;
> > + volatile char c;
> > +
> > + if (unlikely(size == 0))
> > + return 0;
> > + if (!PAGE_ALIGNED(uaddr)) {
> > + if (unlikely(__get_user(c, uaddr) != 0))
> > + return size;
> > + uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
> > + }
> > + end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
> > + if (unlikely(end < start))
> > + end = NULL;
> > + while (uaddr != end) {
>
> Same kind of issue here, when 'end' was set to NULL?
>
> Thanks.
>
> > + if (unlikely(__get_user(c, uaddr) != 0))
> > + goto out;
> > + uaddr += PAGE_SIZE;
> > + }
> > +
> > +out:
> > + (void)c;
> > + if (size > uaddr - start)
> > + return size - (uaddr - start);
> > + return 0;
> > +}
> > +EXPORT_SYMBOL(fault_in_readable);
> > +
> > /**
> > * get_dump_page() - pin user page in memory while writing it to core dump
> > * @addr: user address
> > --
> > 2.26.3
> >

Thanks,
Andreas