Re: [PATCH AUTOSEL 4.19 135/191] efi/x86: Handle page faults occurring while running EFI runtime services
From: Ard Biesheuvel
Date: Sun Nov 10 2019 - 02:40:54 EST
On Sun, 10 Nov 2019 at 03:44, Sasha Levin <sashal@xxxxxxxxxx> wrote:
>
> From: Sai Praneeth <sai.praneeth.prakhya@xxxxxxxxx>
>
> [ Upstream commit 3425d934fc0312f62024163736a7afe4de20c10f ]
>
> Memory accesses performed by UEFI runtime services should be limited to:
> - reading/executing from EFI_RUNTIME_SERVICES_CODE memory regions
> - reading/writing from/to EFI_RUNTIME_SERVICES_DATA memory regions
> - reading/writing by-ref arguments
> - reading/writing from/to the stack.
>
> Accesses outside these regions may cause the kernel to hang because the
> memory region requested by the firmware isn't mapped in efi_pgd, which
> causes a page fault in ring 0 and the kernel fails to handle it, leading
> to die(). To save kernel from hanging, add an EFI specific page fault
> handler which recovers from such faults by
> 1. If the efi runtime service is efi_reset_system(), reboot the machine
> through BIOS.
> 2. If the efi runtime service is _not_ efi_reset_system(), then freeze
> efi_rts_wq and schedule a new process.
>
> The EFI page fault handler offers us two advantages:
> 1. Avoid potential hangs caused by buggy firmware.
> 2. Shout loud that the firmware is buggy and hence is not a kernel bug.
>
> Tested-by: Bhupesh Sharma <bhsharma@xxxxxxxxxx>
> Suggested-by: Matt Fleming <matt@xxxxxxxxxxxxxxxxxxx>
> Based-on-code-from: Ricardo Neri <ricardo.neri@xxxxxxxxx>
> Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@xxxxxxxxx>
> Reviewed-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> [ardb: clarify commit log]
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx>
> Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
NAK
This backports a patch to -stable that is *known* to be broken, given
that the same series backports a patch that fixes it.
> ---
> arch/x86/include/asm/efi.h | 1 +
> arch/x86/mm/fault.c | 9 +++
> arch/x86/platform/efi/quirks.c | 78 +++++++++++++++++++++++++
> drivers/firmware/efi/runtime-wrappers.c | 8 +++
> include/linux/efi.h | 8 ++-
> 5 files changed, 103 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
> index baa549f8e9188..45864898f7e50 100644
> --- a/arch/x86/include/asm/efi.h
> +++ b/arch/x86/include/asm/efi.h
> @@ -138,6 +138,7 @@ extern void __init efi_apply_memmap_quirks(void);
> extern int __init efi_reuse_config(u64 tables, int nr_tables);
> extern void efi_delete_dummy_variable(void);
> extern void efi_switch_mm(struct mm_struct *mm);
> +extern void efi_recover_from_page_fault(unsigned long phys_addr);
>
> struct efi_setup_data {
> u64 fw_vendor;
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index 1bcb7242ad79a..53e69c7c5cd18 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -16,6 +16,7 @@
> #include <linux/prefetch.h> /* prefetchw */
> #include <linux/context_tracking.h> /* exception_enter(), ... */
> #include <linux/uaccess.h> /* faulthandler_disabled() */
> +#include <linux/efi.h> /* efi_recover_from_page_fault()*/
> #include <linux/mm_types.h>
>
> #include <asm/cpufeature.h> /* boot_cpu_has, ... */
> @@ -25,6 +26,7 @@
> #include <asm/vsyscall.h> /* emulate_vsyscall */
> #include <asm/vm86.h> /* struct vm86 */
> #include <asm/mmu_context.h> /* vma_pkey() */
> +#include <asm/efi.h> /* efi_recover_from_page_fault()*/
>
> #define CREATE_TRACE_POINTS
> #include <asm/trace/exceptions.h>
> @@ -783,6 +785,13 @@ no_context(struct pt_regs *regs, unsigned long error_code,
> if (is_errata93(regs, address))
> return;
>
> + /*
> + * Buggy firmware could access regions which might page fault, try to
> + * recover from such faults.
> + */
> + if (IS_ENABLED(CONFIG_EFI))
> + efi_recover_from_page_fault(address);
> +
> /*
> * Oops. The kernel tried to access some bad page. We'll have to
> * terminate things with extreme prejudice:
> diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
> index 844d31cb8a0c7..669babcaf245a 100644
> --- a/arch/x86/platform/efi/quirks.c
> +++ b/arch/x86/platform/efi/quirks.c
> @@ -16,6 +16,7 @@
> #include <asm/efi.h>
> #include <asm/uv/uv.h>
> #include <asm/cpu_device_id.h>
> +#include <asm/reboot.h>
>
> #define EFI_MIN_RESERVE 5120
>
> @@ -654,3 +655,80 @@ int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
> }
>
> #endif
> +
> +/*
> + * If any access by any efi runtime service causes a page fault, then,
> + * 1. If it's efi_reset_system(), reboot through BIOS.
> + * 2. If any other efi runtime service, then
> + * a. Return error status to the efi caller process.
> + * b. Disable EFI Runtime Services forever and
> + * c. Freeze efi_rts_wq and schedule new process.
> + *
> + * @return: Returns, if the page fault is not handled. This function
> + * will never return if the page fault is handled successfully.
> + */
> +void efi_recover_from_page_fault(unsigned long phys_addr)
> +{
> + if (!IS_ENABLED(CONFIG_X86_64))
> + return;
> +
> + /*
> + * Make sure that an efi runtime service caused the page fault.
> + * "efi_mm" cannot be used to check if the page fault had occurred
> + * in the firmware context because efi=old_map doesn't use efi_pgd.
> + */
> + if (efi_rts_work.efi_rts_id == NONE)
> + return;
> +
> + /*
> + * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so
> + * page faulting on these addresses isn't expected.
> + */
> + if (phys_addr >= 0x0000 && phys_addr <= 0x0fff)
> + return;
> +
> + /*
> + * Print stack trace as it might be useful to know which EFI Runtime
> + * Service is buggy.
> + */
> + WARN(1, FW_BUG "Page fault caused by firmware at PA: 0x%lx\n",
> + phys_addr);
> +
> + /*
> + * Buggy efi_reset_system() is handled differently from other EFI
> + * Runtime Services as it doesn't use efi_rts_wq. Although,
> + * native_machine_emergency_restart() says that machine_real_restart()
> + * could fail, it's better not to compilcate this fault handler
> + * because this case occurs *very* rarely and hence could be improved
> + * on a need by basis.
> + */
> + if (efi_rts_work.efi_rts_id == RESET_SYSTEM) {
> + pr_info("efi_reset_system() buggy! Reboot through BIOS\n");
> + machine_real_restart(MRR_BIOS);
> + return;
> + }
> +
> + /*
> + * Before calling EFI Runtime Service, the kernel has switched the
> + * calling process to efi_mm. Hence, switch back to task_mm.
> + */
> + arch_efi_call_virt_teardown();
> +
> + /* Signal error status to the efi caller process */
> + efi_rts_work.status = EFI_ABORTED;
> + complete(&efi_rts_work.efi_rts_comp);
> +
> + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
> + pr_info("Froze efi_rts_wq and disabled EFI Runtime Services\n");
> +
> + /*
> + * Call schedule() in an infinite loop, so that any spurious wake ups
> + * will never run efi_rts_wq again.
> + */
> + for (;;) {
> + set_current_state(TASK_IDLE);
> + schedule();
> + }
> +
> + return;
> +}
> diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
> index b31e3d3729a6d..e2abfdb5cee6a 100644
> --- a/drivers/firmware/efi/runtime-wrappers.c
> +++ b/drivers/firmware/efi/runtime-wrappers.c
> @@ -61,6 +61,11 @@ struct efi_runtime_work efi_rts_work;
> ({ \
> efi_rts_work.status = EFI_ABORTED; \
> \
> + if (!efi_enabled(EFI_RUNTIME_SERVICES)) { \
> + pr_warn_once("EFI Runtime Services are disabled!\n"); \
> + goto exit; \
> + } \
> + \
> init_completion(&efi_rts_work.efi_rts_comp); \
> INIT_WORK(&efi_rts_work.work, efi_call_rts); \
> efi_rts_work.arg1 = _arg1; \
> @@ -79,6 +84,8 @@ struct efi_runtime_work efi_rts_work;
> else \
> pr_err("Failed to queue work to efi_rts_wq.\n"); \
> \
> +exit: \
> + efi_rts_work.efi_rts_id = NONE; \
> efi_rts_work.status; \
> })
>
> @@ -400,6 +407,7 @@ static void virt_efi_reset_system(int reset_type,
> "could not get exclusive access to the firmware\n");
> return;
> }
> + efi_rts_work.efi_rts_id = RESET_SYSTEM;
> __efi_call_virt(reset_system, reset_type, status, data_size, data);
> up(&efi_runtime_lock);
> }
> diff --git a/include/linux/efi.h b/include/linux/efi.h
> index 9d4c25090fd04..9a86f467b8911 100644
> --- a/include/linux/efi.h
> +++ b/include/linux/efi.h
> @@ -1666,8 +1666,13 @@ struct linux_efi_tpm_eventlog {
>
> extern int efi_tpm_eventlog_init(void);
>
> -/* efi_runtime_service() function identifiers */
> +/*
> + * efi_runtime_service() function identifiers.
> + * "NONE" is used by efi_recover_from_page_fault() to check if the page
> + * fault happened while executing an efi runtime service.
> + */
> enum efi_rts_ids {
> + NONE,
> GET_TIME,
> SET_TIME,
> GET_WAKEUP_TIME,
> @@ -1677,6 +1682,7 @@ enum efi_rts_ids {
> SET_VARIABLE,
> QUERY_VARIABLE_INFO,
> GET_NEXT_HIGH_MONO_COUNT,
> + RESET_SYSTEM,
> UPDATE_CAPSULE,
> QUERY_CAPSULE_CAPS,
> };
> --
> 2.20.1
>