Re: [PATCH 10/21] userfaultfd: add new syscall to provide memory externalization

From: Pavel Emelyanov
Date: Thu Mar 05 2015 - 12:58:33 EST



> +int handle_userfault(struct vm_area_struct *vma, unsigned long address,
> + unsigned int flags, unsigned long reason)
> +{
> + struct mm_struct *mm = vma->vm_mm;
> + struct userfaultfd_ctx *ctx;
> + struct userfaultfd_wait_queue uwq;
> +
> + BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
> +
> + ctx = vma->vm_userfaultfd_ctx.ctx;
> + if (!ctx)
> + return VM_FAULT_SIGBUS;
> +
> + BUG_ON(ctx->mm != mm);
> +
> + VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP));
> + VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP));
> +
> + /*
> + * If it's already released don't get it. This avoids to loop
> + * in __get_user_pages if userfaultfd_release waits on the
> + * caller of handle_userfault to release the mmap_sem.
> + */
> + if (unlikely(ACCESS_ONCE(ctx->released)))
> + return VM_FAULT_SIGBUS;
> +
> + /* check that we can return VM_FAULT_RETRY */
> + if (unlikely(!(flags & FAULT_FLAG_ALLOW_RETRY))) {
> + /*
> + * Validate the invariant that nowait must allow retry
> + * to be sure not to return SIGBUS erroneously on
> + * nowait invocations.
> + */
> + BUG_ON(flags & FAULT_FLAG_RETRY_NOWAIT);
> +#ifdef CONFIG_DEBUG_VM
> + if (printk_ratelimit()) {
> + printk(KERN_WARNING
> + "FAULT_FLAG_ALLOW_RETRY missing %x\n", flags);
> + dump_stack();
> + }
> +#endif
> + return VM_FAULT_SIGBUS;
> + }
> +
> + /*
> + * Handle nowait, not much to do other than tell it to retry
> + * and wait.
> + */
> + if (flags & FAULT_FLAG_RETRY_NOWAIT)
> + return VM_FAULT_RETRY;
> +
> + /* take the reference before dropping the mmap_sem */
> + userfaultfd_ctx_get(ctx);
> +
> + /* be gentle and immediately relinquish the mmap_sem */
> + up_read(&mm->mmap_sem);
> +
> + init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
> + uwq.wq.private = current;
> + uwq.address = userfault_address(address, flags, reason);

Since we report only the virtual address of the fault, this will make difficulties
for task monitoring the address space of some other task. Like this:

Let's assume a task creates a userfaultfd, activates one, registers several VMAs
in it and then sends the ufd descriptor to other task. If later the first task will
remap those VMAs and will start touching pages, the monitor will start receiving
fault addresses using which it will not be able to guess the exact vma the
requests come from.

Thanks,
Pavel

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/