Re: [RFC v2-fix 1/1] x86/tdx: Handle in-kernel MMIO

From: Kirill A. Shutemov
Date: Thu May 20 2021 - 18:47:53 EST


On Tue, May 18, 2021 at 06:17:04PM +0000, Sean Christopherson wrote:
> On Tue, May 18, 2021, Andi Kleen wrote:
> > > Why does this code exist at all? TDX and SEV-ES absolutely must share code for
> > > handling MMIO reflection. It will require a fair amount of refactoring to move
> > > the guts of vc_handle_mmio() to common code, but there is zero reason to maintain
> > > two separate versions of the opcode cracking.
> >
> > While that's true on the high level, all the low level details are
> > different. We looked at unifying at some point, but it would have been a
> > callback hell. I don't think unifying would make anything cleaner.
>
> How hard did you look? The only part that _must_ be different between SEV and
> TDX is the hypercall itself, which is wholly contained at the very end of
> vc_do_mmio().

I've come up with the code below. decode_mmio() can be shared with SEV.

I don't have a testing setup for AMD. I can do a blind patch, but it would
be much more productive if someone on AMD side could look into this.

Any opinions?

enum mmio_type {
MMIO_DECODE_FAILED,
MMIO_WRITE,
MMIO_WRITE_IMM,
MMIO_READ,
MMIO_READ_ZERO_EXTEND,
MMIO_READ_SIGN_EXTEND,
MMIO_MOVS,
};

static enum mmio_type decode_mmio(struct insn *insn, struct pt_regs *regs,
int *bytes)
{
int type = MMIO_DECODE_FAILED;

*bytes = 0;

switch (insn->opcode.bytes[0]) {
case 0x88: /* MOV m8,r8 */
*bytes = 1;
fallthrough;
case 0x89: /* MOV m16/m32/m64, r16/m32/m64 */
if (!*bytes)
*bytes = insn->opnd_bytes;
type = MMIO_WRITE;
break;

case 0xc6: /* MOV m8, imm8 */
*bytes = 1;
fallthrough;
case 0xc7: /* MOV m16/m32/m64, imm16/imm32/imm64 */
if (!*bytes)
*bytes = insn->opnd_bytes;
type = MMIO_WRITE_IMM;
break;

case 0x8a: /* MOV r8, m8 */
*bytes = 1;
fallthrough;
case 0x8b: /* MOV r16/r32/r64, m16/m32/m64 */
if (!*bytes)
*bytes = insn->opnd_bytes;
type = MMIO_READ;
break;

case 0xa4: /* MOVS m8, m8 */
*bytes = 1;
fallthrough;
case 0xa5: /* MOVS m16/m32/m64, m16/m32/m64 */
if (!*bytes)
*bytes = insn->opnd_bytes;
type = MMIO_MOVS;
break;

case 0x0f: /* Two-byte instruction */
switch (insn->opcode.bytes[1]) {
case 0xb6: /* MOVZX r16/r32/r64, m8 */
*bytes = 1;
fallthrough;
case 0xb7: /* MOVZX r32/r64, m16 */
if (!*bytes)
*bytes = 2;
type = MMIO_READ_ZERO_EXTEND;
break;

case 0xbe: /* MOVSX r16/r32/r64, m8 */
*bytes = 1;
fallthrough;
case 0xbf: /* MOVSX r32/r64, m16 */
if (!*bytes)
*bytes = 2;
type = MMIO_READ_SIGN_EXTEND;
break;
}
break;
}

return type;
}

static int tdg_handle_mmio(struct pt_regs *regs, struct ve_info *ve)
{
int size;
unsigned long *reg;
struct insn insn;
unsigned long val = 0;

kernel_insn_init(&insn, (void *) regs->ip, MAX_INSN_SIZE);
insn_get_length(&insn);
insn_get_opcode(&insn);

reg = get_reg_ptr(&insn, regs);

switch (decode_mmio(&insn, regs, &size)) {
case MMIO_WRITE:
memcpy(&val, reg, size);
tdg_mmio(size, true, ve->gpa, val);
break;
case MMIO_WRITE_IMM:
val = insn.immediate.value;
tdg_mmio(size, true, ve->gpa, val);
break;
case MMIO_READ:
val = tdg_mmio(size, false, ve->gpa, val);
/* Zero-extend for 32-bit operation */
if (size == 4)
*reg = 0;
memcpy(reg, &val, size);
break;
case MMIO_READ_ZERO_EXTEND:
val = tdg_mmio(size, false, ve->gpa, val);

/* Zero extend based on operand size */
memset(reg, 0, insn.opnd_bytes);
memcpy(reg, &val, size);
break;
case MMIO_READ_SIGN_EXTEND:
val = tdg_mmio(size, false, ve->gpa, val);

/* Sign extend based on operand size */
if (val & (size == 1 ? 0x80 : 0x8000))
memset(reg, 0xff, insn.opnd_bytes);
else
memset(reg, 0, insn.opnd_bytes);
memcpy(reg, &val, size);
break;
case MMIO_MOVS:
case MMIO_DECODE_FAILED:
force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) ve->gla);
return 0;
}

return insn.length;
}

--
Kirill A. Shutemov