[PATCH v4 1/4] kexec: avoid compat_alloc_user_space

From: Arnd Bergmann
Date: Tue Jul 20 2021 - 11:33:59 EST


From: Arnd Bergmann <arnd@xxxxxxxx>

The compat version of sys_kexec_load() uses compat_alloc_user_space to
convert the user-provided arguments into the native format, and it is
one of the last system calls to do this after almost all other ones have
been converted to a different solution.

Change do_kexec_load_locked() to instead take a kernel pointer,
and do the conversion of the compat format in the two entry points
for native and compat mode.

This approach was suggested by Eric Biederman, who posted the initial
version as an alternative to a different patch from Arnd.

Link: https://lore.kernel.org/lkml/m1y2cbzmnw.fsf@xxxxxxxxxxxxxxxxx/
Signed-off-by: Arnd Bergmann <arnd@xxxxxxxx>
---
kernel/kexec.c | 116 +++++++++++++++++++++++++------------------------
1 file changed, 59 insertions(+), 57 deletions(-)

diff --git a/kernel/kexec.c b/kernel/kexec.c
index c82c6c06f051..4eae5f2aa159 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -19,26 +19,21 @@

#include "kexec_internal.h"

-static int copy_user_segment_list(struct kimage *image,
+static void copy_user_segment_list(struct kimage *image,
unsigned long nr_segments,
- struct kexec_segment __user *segments)
+ struct kexec_segment *segments)
{
- int ret;
size_t segment_bytes;

/* Read in the segments */
image->nr_segments = nr_segments;
segment_bytes = nr_segments * sizeof(*segments);
- ret = copy_from_user(image->segment, segments, segment_bytes);
- if (ret)
- ret = -EFAULT;
-
- return ret;
+ memcpy(image->segment, segments, segment_bytes);
}

static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
unsigned long nr_segments,
- struct kexec_segment __user *segments,
+ struct kexec_segment *segments,
unsigned long flags)
{
int ret;
@@ -59,9 +54,7 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,

image->start = entry;

- ret = copy_user_segment_list(image, nr_segments, segments);
- if (ret)
- goto out_free_image;
+ copy_user_segment_list(image, nr_segments, segments);

if (kexec_on_panic) {
/* Enable special crash kernel control page alloc policy. */
@@ -103,8 +96,8 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
return ret;
}

-static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
- struct kexec_segment __user *segments, unsigned long flags)
+static int do_kexec_load_locked(unsigned long entry, unsigned long nr_segments,
+ struct kexec_segment *segments, unsigned long flags)
{
struct kimage **dest_image, *image;
unsigned long i;
@@ -174,6 +167,27 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
return ret;
}

+static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
+ struct kexec_segment *segments, unsigned long flags)
+{
+ int result;
+
+ /* Because we write directly to the reserved memory
+ * region when loading crash kernels we need a mutex here to
+ * prevent multiple crash kernels from attempting to load
+ * simultaneously, and to prevent a crash kernel from loading
+ * over the top of a in use crash kernel.
+ *
+ * KISS: always take the mutex.
+ */
+ if (!mutex_trylock(&kexec_mutex))
+ return -EBUSY;
+
+ result = do_kexec_load_locked(entry, nr_segments, segments, flags);
+ mutex_unlock(&kexec_mutex);
+ return result;
+}
+
/*
* Exec Kernel system call: for obvious reasons only root may call it.
*
@@ -224,6 +238,11 @@ static inline int kexec_load_check(unsigned long nr_segments,
if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
return -EINVAL;

+ /* Verify we are on the appropriate architecture */
+ if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
+ ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
+ return -EINVAL;
+
/* Put an artificial cap on the number
* of segments passed to kexec_load.
*/
@@ -236,32 +255,26 @@ static inline int kexec_load_check(unsigned long nr_segments,
SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
struct kexec_segment __user *, segments, unsigned long, flags)
{
- int result;
+ struct kexec_segment *ksegments;
+ unsigned long bytes, result;

result = kexec_load_check(nr_segments, flags);
if (result)
return result;

- /* Verify we are on the appropriate architecture */
- if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
- ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
- return -EINVAL;
-
- /* Because we write directly to the reserved memory
- * region when loading crash kernels we need a mutex here to
- * prevent multiple crash kernels from attempting to load
- * simultaneously, and to prevent a crash kernel from loading
- * over the top of a in use crash kernel.
- *
- * KISS: always take the mutex.
- */
- if (!mutex_trylock(&kexec_mutex))
- return -EBUSY;
+ bytes = nr_segments * sizeof(ksegments[0]);
+ ksegments = kmalloc(bytes, GFP_KERNEL);
+ if (!ksegments)
+ return -ENOMEM;

- result = do_kexec_load(entry, nr_segments, segments, flags);
+ result = copy_from_user(ksegments, segments, bytes);
+ if (result)
+ goto fail;

- mutex_unlock(&kexec_mutex);
+ result = do_kexec_load(entry, nr_segments, ksegments, flags);

+fail:
+ kfree(ksegments);
return result;
}

@@ -272,8 +285,8 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
compat_ulong_t, flags)
{
struct compat_kexec_segment in;
- struct kexec_segment out, __user *ksegments;
- unsigned long i, result;
+ struct kexec_segment *ksegments;
+ unsigned long bytes, i, result;

result = kexec_load_check(nr_segments, flags);
if (result)
@@ -285,37 +298,26 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
return -EINVAL;

- ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
+ bytes = nr_segments * sizeof(ksegments[0]);
+ ksegments = kmalloc(bytes, GFP_KERNEL);
+ if (!ksegments)
+ return -ENOMEM;
+
for (i = 0; i < nr_segments; i++) {
result = copy_from_user(&in, &segments[i], sizeof(in));
if (result)
- return -EFAULT;
-
- out.buf = compat_ptr(in.buf);
- out.bufsz = in.bufsz;
- out.mem = in.mem;
- out.memsz = in.memsz;
+ goto fail;

- result = copy_to_user(&ksegments[i], &out, sizeof(out));
- if (result)
- return -EFAULT;
+ ksegments[i].buf = compat_ptr(in.buf);
+ ksegments[i].bufsz = in.bufsz;
+ ksegments[i].mem = in.mem;
+ ksegments[i].memsz = in.memsz;
}

- /* Because we write directly to the reserved memory
- * region when loading crash kernels we need a mutex here to
- * prevent multiple crash kernels from attempting to load
- * simultaneously, and to prevent a crash kernel from loading
- * over the top of a in use crash kernel.
- *
- * KISS: always take the mutex.
- */
- if (!mutex_trylock(&kexec_mutex))
- return -EBUSY;
-
result = do_kexec_load(entry, nr_segments, ksegments, flags);

- mutex_unlock(&kexec_mutex);
-
+fail:
+ kfree(ksegments);
return result;
}
#endif
--
2.29.2