Re: [PATCHv11 11/19] x86/tdx: Convert shared memory back to private on kexec

From: Kirill A. Shutemov
Date: Wed Jun 05 2024 - 09:19:21 EST


On Tue, Jun 04, 2024 at 09:27:59AM -0700, Dave Hansen wrote:
> On 5/28/24 02:55, Kirill A. Shutemov wrote:
> > +/* Stop new private<->shared conversions */
> > +static void tdx_kexec_begin(bool crash)
> > +{
> > + /*
> > + * Crash kernel reaches here with interrupts disabled: can't wait for
> > + * conversions to finish.
> > + *
> > + * If race happened, just report and proceed.
> > + */
> > + if (!set_memory_enc_stop_conversion(!crash))
> > + pr_warn("Failed to stop shared<->private conversions\n");
> > +}
>
> I don't like having to pass 'crash' in here.
>
> If interrupts are the problem we have ways of testing for those directly.
>
> If it's being in an oops that's a problem, we have 'oops_in_progress'
> for that.
>
> In other words, I'd much rather this function (or better yet
> set_memory_enc_stop_conversion() itself) use some existing API to change
> its behavior in a crash rather than have the context be passed down and
> twiddled through several levels of function calls.
>
> There are a ton of these in the console code:
>
> if (oops_in_progress)
> foo_trylock();
> else
> foo_lock();
>
> To me, that's a billion times more clear than a 'wait' argument that
> gets derives from who-knows-what that I have to trace through ten levels
> of function calls.

Okay fair enough. Check out the fixup below. Is it what you mean?

One other thing I realized is that these callback are dead code if kernel
compiled without kexec support. Do we want them to be wrapped with
#ifdef COFNIG_KEXEC_CORE everywhere? It is going to be ugly.

Any better ideas?

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 3d23ea0f5d45..1c5aa036b76b 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -834,7 +834,7 @@ static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
}

/* Stop new private<->shared conversions */
-static void tdx_kexec_begin(bool crash)
+static void tdx_kexec_begin(void)
{
/*
* Crash kernel reaches here with interrupts disabled: can't wait for
@@ -842,7 +842,7 @@ static void tdx_kexec_begin(bool crash)
*
* If race happened, just report and proceed.
*/
- if (!set_memory_enc_stop_conversion(!crash))
+ if (!set_memory_enc_stop_conversion())
pr_warn("Failed to stop shared<->private conversions\n");
}

diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index d490db38db9e..4b2abce2e3e7 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -50,7 +50,7 @@ int set_memory_np(unsigned long addr, int numpages);
int set_memory_p(unsigned long addr, int numpages);
int set_memory_4k(unsigned long addr, int numpages);

-bool set_memory_enc_stop_conversion(bool wait);
+bool set_memory_enc_stop_conversion(void);
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index b0f313278967..213cf5379a5a 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -152,8 +152,6 @@ struct x86_init_acpi {
* @enc_kexec_begin Begin the two-step process of converting shared memory back
* to private. It stops the new conversions from being started
* and waits in-flight conversions to finish, if possible.
- * The @crash parameter denotes whether the function is being
- * called in the crash shutdown path.
* @enc_kexec_finish Finish the two-step process of converting shared memory to
* private. All memory is private after the call when
* the function returns.
@@ -165,7 +163,7 @@ struct x86_guest {
int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
bool (*enc_tlb_flush_required)(bool enc);
bool (*enc_cache_flush_required)(void);
- void (*enc_kexec_begin)(bool crash);
+ void (*enc_kexec_begin)(void);
void (*enc_kexec_finish)(void);
};

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index fc52ea80cdc8..340af8155658 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -137,7 +137,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
* down and interrupts have been disabled. This allows the callback to
* detect a race with the conversion and report it.
*/
- x86_platform.guest.enc_kexec_begin(true);
+ x86_platform.guest.enc_kexec_begin();
x86_platform.guest.enc_kexec_finish();

crash_save_cpu(regs, safe_smp_processor_id());
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 513809b5b27c..0e0a4cf6b5eb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -723,7 +723,7 @@ void native_machine_shutdown(void)
* conversions to finish cleanly.
*/
if (kexec_in_progress)
- x86_platform.guest.enc_kexec_begin(false);
+ x86_platform.guest.enc_kexec_begin();

/* Stop the cpus and apics */
#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 8a79fb505303..82b128d3f309 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -138,7 +138,7 @@ static int enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool
static int enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return 0; }
static bool enc_tlb_flush_required_noop(bool enc) { return false; }
static bool enc_cache_flush_required_noop(void) { return false; }
-static void enc_kexec_begin_noop(bool crash) {}
+static void enc_kexec_begin_noop(void) {}
static void enc_kexec_finish_noop(void) {}
static bool is_private_mmio_noop(u64 addr) {return false; }

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 2a548b65ef5f..443a97e515c0 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2240,13 +2240,14 @@ static DECLARE_RWSEM(mem_enc_lock);
*
* Taking the exclusive mem_enc_lock waits for in-flight conversions to complete.
* The lock is not released to prevent new conversions from being started.
- *
- * If sleep is not allowed, as in a crash scenario, try to take the lock.
- * Failure indicates that there is a race with the conversion.
*/
-bool set_memory_enc_stop_conversion(bool wait)
+bool set_memory_enc_stop_conversion(void)
{
- if (!wait)
+ /*
+ * In a crash scenario, sleep is not allowed. Try to take the lock.
+ * Failure indicates that there is a race with the conversion.
+ */
+ if (oops_in_progress)
return down_write_trylock(&mem_enc_lock);

down_write(&mem_enc_lock);
--
Kiryl Shutsemau / Kirill A. Shutemov