[PATCH v2 1/1] userfaultfd/sysctl: add vm.unprivileged_userfaultfd

From: Peter Xu
Date: Mon Mar 18 2019 - 23:07:49 EST


Add a global sysctl knob "vm.unprivileged_userfaultfd" to control
whether userfaultfd is allowed by unprivileged users. When this is
set to zero, only privileged users (root user, or users with the
CAP_SYS_PTRACE capability) will be able to use the userfaultfd
syscalls.

Suggested-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Suggested-by: Mike Rapoport <rppt@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
---
Documentation/sysctl/vm.txt | 12 ++++++++++++
fs/userfaultfd.c | 5 +++++
include/linux/userfaultfd_k.h | 2 ++
kernel/sysctl.c | 12 ++++++++++++
4 files changed, 31 insertions(+)

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 187ce4f599a2..f146712f67bb 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -61,6 +61,7 @@ Currently, these files are in /proc/sys/vm:
- stat_refresh
- numa_stat
- swappiness
+- unprivileged_userfaultfd
- user_reserve_kbytes
- vfs_cache_pressure
- watermark_boost_factor
@@ -818,6 +819,17 @@ The default value is 60.

==============================================================

+unprivileged_userfaultfd
+
+This flag controls whether unprivileged users can use the userfaultfd
+syscalls. Set this to 1 to allow unprivileged users to use the
+userfaultfd syscalls, or set this to 0 to restrict userfaultfd to only
+privileged users (with SYS_CAP_PTRACE capability).
+
+The default value is 1.
+
+==============================================================
+
- user_reserve_kbytes

When overcommit_memory is set to 2, "never overcommit" mode, reserve
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 89800fc7dc9d..7e856a25cc2f 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -30,6 +30,8 @@
#include <linux/security.h>
#include <linux/hugetlb.h>

+int sysctl_unprivileged_userfaultfd __read_mostly = 1;
+
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;

enum userfaultfd_state {
@@ -1921,6 +1923,9 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
struct userfaultfd_ctx *ctx;
int fd;

+ if (!sysctl_unprivileged_userfaultfd && !capable(CAP_SYS_PTRACE))
+ return -EPERM;
+
BUG_ON(!current->mm);

/* Check the UFFD_* constants for consistency. */
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 37c9eba75c98..ac9d71e24b81 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -28,6 +28,8 @@
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)

+extern int sysctl_unprivileged_userfaultfd;
+
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);

extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7578e21a711b..9b8ff1881df9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -66,6 +66,7 @@
#include <linux/kexec.h>
#include <linux/bpf.h>
#include <linux/mount.h>
+#include <linux/userfaultfd_k.h>

#include <linux/uaccess.h>
#include <asm/processor.h>
@@ -1704,6 +1705,17 @@ static struct ctl_table vm_table[] = {
.extra1 = (void *)&mmap_rnd_compat_bits_min,
.extra2 = (void *)&mmap_rnd_compat_bits_max,
},
+#endif
+#ifdef CONFIG_USERFAULTFD
+ {
+ .procname = "unprivileged_userfaultfd",
+ .data = &sysctl_unprivileged_userfaultfd,
+ .maxlen = sizeof(sysctl_unprivileged_userfaultfd),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
#endif
{ }
};
--
2.17.1