RE: [PATCH v13 26/35] x86/fred: FRED entry/exit and dispatch code

From: Li, Xin3
Date: Wed Dec 06 2023 - 02:46:01 EST


> > diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
> > new file mode 100644 index 000000000000..215883e90f94
> > --- /dev/null
> > +++ b/arch/x86/entry/entry_fred.c
> > @@ -0,0 +1,230 @@
> > ...
> > +static noinstr void fred_intx(struct pt_regs *regs) {
> > + switch (regs->fred_ss.vector) {
> > + /* INT0 */
>
> INTO (for overflow), not INT-zero.  However...

My bad again...

> > + case X86_TRAP_OF:
> > + exc_overflow(regs);
> > + return;
> > +
> > + /* INT3 */
> > + case X86_TRAP_BP:
> > + exc_int3(regs);
> > + return;
>
> ... neither OF nor BP will ever enter fred_intx() because they're type SWEXC not
> SWINT.

Per FRED spec 5.0, section 7.3 Software Interrupts and Related Instructions:
INT n (opcode CD followed by an immediate byte): There are 256 such
software interrupt instructions, one for each value n of the immediate
byte (0–255).

And appendix B Event Stack Levels:
If the event is an execution of INT n (opcode CD n for 8-bit value n),
the event stack level is 0. The event type is 4 (software interrupt)
and the vector is n.

So int $0x4 and int $0x3 (use asm(".byte 0xCD, 0x03")) get here.

But into (0xCE) and int3 (0xCC) do use event type SWEXC.

BTW, into is NOT allowed in 64-bit mode but "int $0x4" is allowed.

>
> SWINT is strictly the INT $imm8 instruction.
>
> > ...
> > +static noinstr void fred_extint(struct pt_regs *regs) {
> > + unsigned int vector = regs->fred_ss.vector;
> > +
> > + if (WARN_ON_ONCE(vector < FIRST_EXTERNAL_VECTOR))
> > + return;
> > +
> > + if (likely(vector >= FIRST_SYSTEM_VECTOR)) {
> > + irqentry_state_t state = irqentry_enter(regs);
> > +
> > + instrumentation_begin();
> > + sysvec_table[vector - FIRST_SYSTEM_VECTOR](regs);
>
> array_index_mask_nospec()
>
> This is easy for an attacker to abuse, to install non-function-pointer targets into
> the indirect predictor.

HPA did use array_index_nospec() at the beginning, but I forgot it later.

>
> > + instrumentation_end();
> > + irqentry_exit(regs, state);
> > + } else {
> > + common_interrupt(regs, vector);
> > + }
> > +}
> > +
> > +static noinstr void fred_exception(struct pt_regs *regs, unsigned
> > +long error_code) {
> > + /* Optimize for #PF. That's the only exception which matters performance
> wise */
> > + if (likely(regs->fred_ss.vector == X86_TRAP_PF)) {
> > + exc_page_fault(regs, error_code);
> > + return;
> > + }
> > +
> > + switch (regs->fred_ss.vector) {
> > + case X86_TRAP_DE: return exc_divide_error(regs);
> > + case X86_TRAP_DB: return fred_exc_debug(regs);
> > + case X86_TRAP_BP: return exc_int3(regs);
> > + case X86_TRAP_OF: return exc_overflow(regs);
>
> Depending on what you want to do with BP/OF vs fred_intx(), this may need
> adjusting.
>
> If you are cross-checking type and vector, then these should be rejected for not
> being of type HWEXC.

You're right, the event type needs to be SWEXC for into and int3.

However, would it be overkilling? Assuming hardware and VMM are sane.

>
> > + case X86_TRAP_BR: return exc_bounds(regs);
> > + case X86_TRAP_UD: return exc_invalid_op(regs);
> > + case X86_TRAP_NM: return exc_device_not_available(regs);
> > + case X86_TRAP_DF: return exc_double_fault(regs, error_code);
> > + case X86_TRAP_TS: return exc_invalid_tss(regs, error_code);
> > + case X86_TRAP_NP: return exc_segment_not_present(regs, error_code);
> > + case X86_TRAP_SS: return exc_stack_segment(regs, error_code);
> > + case X86_TRAP_GP: return exc_general_protection(regs, error_code);
> > + case X86_TRAP_MF: return exc_coprocessor_error(regs);
> > + case X86_TRAP_AC: return exc_alignment_check(regs, error_code);
> > + case X86_TRAP_XF: return exc_simd_coprocessor_error(regs);
> > +
> > +#ifdef CONFIG_X86_MCE
> > + case X86_TRAP_MC: return fred_exc_machine_check(regs); #endif #ifdef
> > +CONFIG_INTEL_TDX_GUEST
> > + case X86_TRAP_VE: return exc_virtualization_exception(regs);
> > +#endif
> > +#ifdef CONFIG_X86_KERNEL_IBT
>
> CONFIG_X86_CET
>
> Userspace can use CET even if the kernel isn't compiled with IBT, so this
> exception needs handling.

Absolutely correct!

>
> > + case X86_TRAP_CP: return exc_control_protection(regs, error_code);
> > +#endif
> > + default: return fred_bad_type(regs, error_code);
> > + }
> > +}
> > +
> > +__visible noinstr void fred_entry_from_user(struct pt_regs *regs) {
> > + unsigned long error_code = regs->orig_ax;
> > +
> > + /* Invalidate orig_ax so that syscall_get_nr() works correctly */
> > + regs->orig_ax = -1;
> > +
> > + switch (regs->fred_ss.type) {
> > + case EVENT_TYPE_EXTINT:
> > + return fred_extint(regs);
> > + case EVENT_TYPE_NMI:
> > + return fred_exc_nmi(regs);
> > + case EVENT_TYPE_SWINT:
> > + return fred_intx(regs);
> > + case EVENT_TYPE_HWEXC:
> > + case EVENT_TYPE_SWEXC:
> > + case EVENT_TYPE_PRIV_SWEXC:
> > + return fred_exception(regs, error_code);
>
> PRIV_SWEXC should have it's own function and not fall into fred_exception().
>
> It is strictly only the ICEBP (INT1) instruction at the moment, so should fall into
> bad_type() for any vector other than X86_TRAP_DB.

Good point!

It's like NMI, one event type with only one valid event vector now.

>
> > + case EVENT_TYPE_OTHER:
> > + return fred_other(regs);
> > + default:
> > + return fred_bad_type(regs, error_code);
> > + }
> > +}
>
> ~Andrew

Thanks!
Xin