Re: [PATCH v7 03/19] gup: Turn fault_in_pages_{readable,writeable} into fault_in_{readable,writeable}

From: Filipe Manana
Date: Fri Sep 03 2021 - 10:57:08 EST


On Fri, Aug 27, 2021 at 5:52 PM Andreas Gruenbacher <agruenba@xxxxxxxxxx> wrote:
>
> Turn fault_in_pages_{readable,writeable} into versions that return the
> number of bytes not faulted in (similar to copy_to_user) instead of
> returning a non-zero value when any of the requested pages couldn't be
> faulted in. This supports the existing users that require all pages to
> be faulted in as well as new users that are happy if any pages can be
> faulted in at all.
>
> Neither of these functions is entirely trivial and it doesn't seem
> useful to inline them, so move them to mm/gup.c.
>
> Rename the functions to fault_in_{readable,writeable} to make sure that
> this change doesn't silently break things.
>
> Signed-off-by: Andreas Gruenbacher <agruenba@xxxxxxxxxx>
> ---
> arch/powerpc/kernel/kvm.c | 3 +-
> arch/powerpc/kernel/signal_32.c | 4 +-
> arch/powerpc/kernel/signal_64.c | 2 +-
> arch/x86/kernel/fpu/signal.c | 7 ++-
> drivers/gpu/drm/armada/armada_gem.c | 7 ++-
> fs/btrfs/ioctl.c | 5 +-
> include/linux/pagemap.h | 57 ++---------------------
> lib/iov_iter.c | 10 ++--
> mm/filemap.c | 2 +-
> mm/gup.c | 72 +++++++++++++++++++++++++++++
> 10 files changed, 93 insertions(+), 76 deletions(-)
>
> diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
> index d89cf802d9aa..6568823cf306 100644
> --- a/arch/powerpc/kernel/kvm.c
> +++ b/arch/powerpc/kernel/kvm.c
> @@ -669,7 +669,8 @@ static void __init kvm_use_magic_page(void)
> on_each_cpu(kvm_map_magic_page, &features, 1);
>
> /* Quick self-test to see if the mapping works */
> - if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) {
> + if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE,
> + sizeof(u32))) {
> kvm_patching_worked = false;
> return;
> }
> diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
> index 0608581967f0..38c3eae40c14 100644
> --- a/arch/powerpc/kernel/signal_32.c
> +++ b/arch/powerpc/kernel/signal_32.c
> @@ -1048,7 +1048,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
> if (new_ctx == NULL)
> return 0;
> if (!access_ok(new_ctx, ctx_size) ||
> - fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
> + fault_in_readable((char __user *)new_ctx, ctx_size))
> return -EFAULT;
>
> /*
> @@ -1237,7 +1237,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
> #endif
>
> if (!access_ok(ctx, sizeof(*ctx)) ||
> - fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx)))
> + fault_in_readable((char __user *)ctx, sizeof(*ctx)))
> return -EFAULT;
>
> /*
> diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
> index 1831bba0582e..9f471b4a11e3 100644
> --- a/arch/powerpc/kernel/signal_64.c
> +++ b/arch/powerpc/kernel/signal_64.c
> @@ -688,7 +688,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
> if (new_ctx == NULL)
> return 0;
> if (!access_ok(new_ctx, ctx_size) ||
> - fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
> + fault_in_readable((char __user *)new_ctx, ctx_size))
> return -EFAULT;
>
> /*
> diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
> index 445c57c9c539..ba6bdec81603 100644
> --- a/arch/x86/kernel/fpu/signal.c
> +++ b/arch/x86/kernel/fpu/signal.c
> @@ -205,7 +205,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
> fpregs_unlock();
>
> if (ret) {
> - if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size))
> + if (!fault_in_writeable(buf_fx, fpu_user_xstate_size))
> goto retry;
> return -EFAULT;
> }
> @@ -278,10 +278,9 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore,
> if (ret != -EFAULT)
> return -EINVAL;
>
> - ret = fault_in_pages_readable(buf, size);
> - if (!ret)
> + if (!fault_in_readable(buf, size))
> goto retry;
> - return ret;
> + return -EFAULT;
> }
>
> /*
> diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
> index 21909642ee4c..8fbb25913327 100644
> --- a/drivers/gpu/drm/armada/armada_gem.c
> +++ b/drivers/gpu/drm/armada/armada_gem.c
> @@ -336,7 +336,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> struct drm_armada_gem_pwrite *args = data;
> struct armada_gem_object *dobj;
> char __user *ptr;
> - int ret;
> + int ret = 0;
>
> DRM_DEBUG_DRIVER("handle %u off %u size %u ptr 0x%llx\n",
> args->handle, args->offset, args->size, args->ptr);
> @@ -349,9 +349,8 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> if (!access_ok(ptr, args->size))
> return -EFAULT;
>
> - ret = fault_in_pages_readable(ptr, args->size);
> - if (ret)
> - return ret;
> + if (fault_in_readable(ptr, args->size))
> + return -EFAULT;
>
> dobj = armada_gem_object_lookup(file, args->handle);
> if (dobj == NULL)
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 0ba98e08a029..9233ecc31e2e 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -2244,9 +2244,8 @@ static noinline int search_ioctl(struct inode *inode,
> key.offset = sk->min_offset;
>
> while (1) {
> - ret = fault_in_pages_writeable(ubuf + sk_offset,
> - *buf_size - sk_offset);
> - if (ret)
> + ret = -EFAULT;
> + if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset))
> break;
>
> ret = btrfs_search_forward(root, &key, path, sk->min_transid);
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index ed02aa522263..7c9edc9694d9 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -734,61 +734,10 @@ int wait_on_page_private_2_killable(struct page *page);
> extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
>
> /*
> - * Fault everything in given userspace address range in.
> + * Fault in userspace address range.
> */
> -static inline int fault_in_pages_writeable(char __user *uaddr, int size)
> -{
> - char __user *end = uaddr + size - 1;
> -
> - if (unlikely(size == 0))
> - return 0;
> -
> - if (unlikely(uaddr > end))
> - return -EFAULT;
> - /*
> - * Writing zeroes into userspace here is OK, because we know that if
> - * the zero gets there, we'll be overwriting it.
> - */
> - do {
> - if (unlikely(__put_user(0, uaddr) != 0))
> - return -EFAULT;
> - uaddr += PAGE_SIZE;
> - } while (uaddr <= end);
> -
> - /* Check whether the range spilled into the next page. */
> - if (((unsigned long)uaddr & PAGE_MASK) ==
> - ((unsigned long)end & PAGE_MASK))
> - return __put_user(0, end);
> -
> - return 0;
> -}
> -
> -static inline int fault_in_pages_readable(const char __user *uaddr, int size)
> -{
> - volatile char c;
> - const char __user *end = uaddr + size - 1;
> -
> - if (unlikely(size == 0))
> - return 0;
> -
> - if (unlikely(uaddr > end))
> - return -EFAULT;
> -
> - do {
> - if (unlikely(__get_user(c, uaddr) != 0))
> - return -EFAULT;
> - uaddr += PAGE_SIZE;
> - } while (uaddr <= end);
> -
> - /* Check whether the range spilled into the next page. */
> - if (((unsigned long)uaddr & PAGE_MASK) ==
> - ((unsigned long)end & PAGE_MASK)) {
> - return __get_user(c, end);
> - }
> -
> - (void)c;
> - return 0;
> -}
> +size_t fault_in_writeable(char __user *uaddr, size_t size);
> +size_t fault_in_readable(const char __user *uaddr, size_t size);
>
> int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
> pgoff_t index, gfp_t gfp_mask);
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 25dfc48536d7..069cedd9d7b4 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -191,7 +191,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
> buf = iov->iov_base + skip;
> copy = min(bytes, iov->iov_len - skip);
>
> - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
> + if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) {
> kaddr = kmap_atomic(page);
> from = kaddr + offset;
>
> @@ -275,7 +275,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
> buf = iov->iov_base + skip;
> copy = min(bytes, iov->iov_len - skip);
>
> - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
> + if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) {
> kaddr = kmap_atomic(page);
> to = kaddr + offset;
>
> @@ -446,13 +446,11 @@ int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes)
> bytes = i->count;
> for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) {
> size_t len = min(bytes, p->iov_len - skip);
> - int err;
>
> if (unlikely(!len))
> continue;
> - err = fault_in_pages_readable(p->iov_base + skip, len);
> - if (unlikely(err))
> - return err;
> + if (fault_in_readable(p->iov_base + skip, len))
> + return -EFAULT;
> bytes -= len;
> }
> }
> diff --git a/mm/filemap.c b/mm/filemap.c
> index d1458ecf2f51..4dec3bc7752e 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -88,7 +88,7 @@
> * ->lock_page (access_process_vm)
> *
> * ->i_mutex (generic_perform_write)
> - * ->mmap_lock (fault_in_pages_readable->do_page_fault)
> + * ->mmap_lock (fault_in_readable->do_page_fault)
> *
> * bdi->wb.list_lock
> * sb_lock (fs/fs-writeback.c)
> diff --git a/mm/gup.c b/mm/gup.c
> index b94717977d17..0cf47955e5a1 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1672,6 +1672,78 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
> }
> #endif /* !CONFIG_MMU */
>
> +/**
> + * fault_in_writeable - fault in userspace address range for writing
> + * @uaddr: start of address range
> + * @size: size of address range
> + *
> + * Returns the number of bytes not faulted in (like copy_to_user() and
> + * copy_from_user()).
> + */
> +size_t fault_in_writeable(char __user *uaddr, size_t size)
> +{
> + char __user *start = uaddr, *end;
> +
> + if (unlikely(size == 0))
> + return 0;
> + if (!PAGE_ALIGNED(uaddr)) {
> + if (unlikely(__put_user(0, uaddr) != 0))
> + return size;
> + uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
> + }
> + end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
> + if (unlikely(end < start))
> + end = NULL;
> + while (uaddr != end) {
> + if (unlikely(__put_user(0, uaddr) != 0))
> + goto out;
> + uaddr += PAGE_SIZE;

Won't we loop endlessly or corrupt some unwanted page when 'end' was
set to NULL?

> + }
> +
> +out:
> + if (size > uaddr - start)
> + return size - (uaddr - start);
> + return 0;
> +}
> +EXPORT_SYMBOL(fault_in_writeable);
> +
> +/**
> + * fault_in_readable - fault in userspace address range for reading
> + * @uaddr: start of user address range
> + * @size: size of user address range
> + *
> + * Returns the number of bytes not faulted in (like copy_to_user() and
> + * copy_from_user()).
> + */
> +size_t fault_in_readable(const char __user *uaddr, size_t size)
> +{
> + const char __user *start = uaddr, *end;
> + volatile char c;
> +
> + if (unlikely(size == 0))
> + return 0;
> + if (!PAGE_ALIGNED(uaddr)) {
> + if (unlikely(__get_user(c, uaddr) != 0))
> + return size;
> + uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
> + }
> + end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
> + if (unlikely(end < start))
> + end = NULL;
> + while (uaddr != end) {

Same kind of issue here, when 'end' was set to NULL?

Thanks.

> + if (unlikely(__get_user(c, uaddr) != 0))
> + goto out;
> + uaddr += PAGE_SIZE;
> + }
> +
> +out:
> + (void)c;
> + if (size > uaddr - start)
> + return size - (uaddr - start);
> + return 0;
> +}
> +EXPORT_SYMBOL(fault_in_readable);
> +
> /**
> * get_dump_page() - pin user page in memory while writing it to core dump
> * @addr: user address
> --
> 2.26.3
>


--
Filipe David Manana,

“Whether you think you can, or you think you can't — you're right.”