Re: [PATCH v3 1/4] kexec: simplify compat_sys_kexec_load

From: Eric W. Biederman
Date: Tue May 18 2021 - 18:45:44 EST



Arnd Bergmann <arnd@xxxxxxxxxx> writes:

> On Tue, May 18, 2021 at 4:05 PM Arnd Bergmann <arnd@xxxxxxxxxx> wrote:
>>
>> On Tue, May 18, 2021 at 3:41 PM Eric W. Biederman <ebiederm@xxxxxxxxxxxx> wrote:
>> >
>> > Arnd Bergmann <arnd@xxxxxxxxxx> writes:
>> >
>> > > From: Arnd Bergmann <arnd@xxxxxxxx>KEXEC_ARCH_DEFAULT
>> > >
>> > > The compat version of sys_kexec_load() uses compat_alloc_user_space to
>> > > convert the user-provided arguments into the native format.
>> > >
>> > > Move the conversion into the regular implementation with
>> > > an in_compat_syscall() check to simplify it and avoid the
>> > > compat_alloc_user_space() call.
>> > >
>> > > compat_sys_kexec_load() now behaves the same as sys_kexec_load().
>> >
>> > Nacked-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
>> >KEXEC_ARCH_DEFAULT
>> > The patch is wrong.
>> >
>> > The logic between the compat entry point and the ordinary entry point
>> > are by necessity different. This unifies the logic and breaks the compat
>> > entry point.
>> >
>> > The fundamentally necessity is that the code being loaded needs to know
>> > which mode the kernel is running in so it can safely transition to the
>> > new kernel.
>> >
>> > Given that the two entry points fundamentally need different logic,
>> > and that difference was not preserved and the goal of this patchset
>> > was to unify that which fundamentally needs to be different. I don't
>> > think this patch series makes any sense for kexec.
>>
>> Sorry, I'm not following that explanation. Can you clarify what different
>> modes of the kernel you are referring to here, and how my patch
>> changes this?

I think something like the untested diff below is enough to get rid of
compat_alloc_user cleanly.

Certainly it should be enough to give any idea what I am thinking.

diff --git a/kernel/kexec.c b/kernel/kexec.c
index c82c6c06f051..ce69a5d68023 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -19,26 +19,21 @@

#include "kexec_internal.h"

-static int copy_user_segment_list(struct kimage *image,
+static void copy_user_segment_list(struct kimage *image,
unsigned long nr_segments,
- struct kexec_segment __user *segments)
+ struct kexec_segment *segments)
{
- int ret;
size_t segment_bytes;

/* Read in the segments */
image->nr_segments = nr_segments;
segment_bytes = nr_segments * sizeof(*segments);
- ret = copy_from_user(image->segment, segments, segment_bytes);
- if (ret)
- ret = -EFAULT;
-
- return ret;
+ memcpy(image->segment, segments, segment_bytes);
}

static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
unsigned long nr_segments,
- struct kexec_segment __user *segments,
+ struct kexec_segment *segments,
unsigned long flags)
{
int ret;
@@ -59,9 +54,7 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,

image->start = entry;

- ret = copy_user_segment_list(image, nr_segments, segments);
- if (ret)
- goto out_free_image;
+ copy_user_segment_list(image, nr_segments, segments);

if (kexec_on_panic) {
/* Enable special crash kernel control page alloc policy. */
@@ -103,8 +96,8 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
return ret;
}

-static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
- struct kexec_segment __user *segments, unsigned long flags)
+static int do_kexec_load_locked(unsigned long entry, unsigned long nr_segments,
+ struct kexec_segment *segments, unsigned long flags)
{
struct kimage **dest_image, *image;
unsigned long i;
@@ -174,6 +167,27 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
return ret;
}

+static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
+ struct kexec_segment *segments, unsigned long flags)
+{
+ int result;
+
+ /* Because we write directly to the reserved memory
+ * region when loading crash kernels we need a mutex here to
+ * prevent multiple crash kernels from attempting to load
+ * simultaneously, and to prevent a crash kernel from loading
+ * over the top of a in use crash kernel.
+ *
+ * KISS: always take the mutex.
+ */
+ if (!mutex_trylock(&kexec_mutex))
+ return -EBUSY;
+
+ result = do_kexec_load_locked(entry, nr_segments, segments, flags);
+ mutex_unlock(&kexec_mutex);
+ return result;
+}
+
/*
* Exec Kernel system call: for obvious reasons only root may call it.
*
@@ -224,6 +238,11 @@ static inline int kexec_load_check(unsigned long nr_segments,
if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
return -EINVAL;

+ /* Verify we are on the appropriate architecture */
+ if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
+ ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
+ return -EINVAL;
+
/* Put an artificial cap on the number
* of segments passed to kexec_load.
*/
@@ -236,33 +255,29 @@ static inline int kexec_load_check(unsigned long nr_segments,
SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
struct kexec_segment __user *, segments, unsigned long, flags)
{
- int result;
+ struct kexec_segment *ksegments;
+ unsigned long bytes, result;

result = kexec_load_check(nr_segments, flags);
if (result)
return result;

- /* Verify we are on the appropriate architecture */
- if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
- ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
- return -EINVAL;
-
- /* Because we write directly to the reserved memory
- * region when loading crash kernels we need a mutex here to
- * prevent multiple crash kernels from attempting to load
- * simultaneously, and to prevent a crash kernel from loading
- * over the top of a in use crash kernel.
- *
- * KISS: always take the mutex.
- */
- if (!mutex_trylock(&kexec_mutex))
- return -EBUSY;
+ bytes = nr_segments * sizeof(ksegments[0]);
+ ksegments = kmalloc(bytes, GFP_KERNEL);
+ if (!ksegments)
+ return -ENOMEM;

+ result = copy_from_user(ksegments, segments, bytes);
+ if (result)
+ goto fail;
+
result = do_kexec_load(entry, nr_segments, segments, flags);
+ kfree(ksegments);

- mutex_unlock(&kexec_mutex);
-
+fail:
+ kfree(ksegments);
return result;
+
}

#ifdef CONFIG_COMPAT
@@ -272,9 +287,9 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
compat_ulong_t, flags)
{
struct compat_kexec_segment in;
- struct kexec_segment out, __user *ksegments;
- unsigned long i, result;
-
+ struct kexec_segment *ksegments;
+ unsigned long bytes, i, result;
+
result = kexec_load_check(nr_segments, flags);
if (result)
return result;
@@ -285,37 +300,26 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
return -EINVAL;

- ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
+ bytes = nr_segments * sizeof(ksegments[0]);
+ ksegments = kmalloc(bytes, GFP_KERNEL);
+ if (!ksegments)
+ return -ENOMEM;
+
for (i = 0; i < nr_segments; i++) {
result = copy_from_user(&in, &segments[i], sizeof(in));
if (result)
- return -EFAULT;
-
- out.buf = compat_ptr(in.buf);
- out.bufsz = in.bufsz;
- out.mem = in.mem;
- out.memsz = in.memsz;
+ goto fail;

- result = copy_to_user(&ksegments[i], &out, sizeof(out));
- if (result)
- return -EFAULT;
+ ksegments[i].buf = compat_ptr(in.buf);
+ ksegments[i].bufsz = in.bufsz;
+ ksegments[i].mem = in.mem;
+ ksegments[i].memsz = in.memsz;
}

- /* Because we write directly to the reserved memory
- * region when loading crash kernels we need a mutex here to
- * prevent multiple crash kernels from attempting to load
- * simultaneously, and to prevent a crash kernel from loading
- * over the top of a in use crash kernel.
- *
- * KISS: always take the mutex.
- */
- if (!mutex_trylock(&kexec_mutex))
- return -EBUSY;
-
result = do_kexec_load(entry, nr_segments, ksegments, flags);

- mutex_unlock(&kexec_mutex);
-
+fail:
+ kfree(ksegments);
return result;
}
#endif