Re: [PATCH v5 0/8] TDX host: metadata reading tweaks, bug fix and info dump

From: Paolo Bonzini
Date: Tue Oct 15 2024 - 12:30:12 EST


On Tue, Oct 15, 2024 at 5:30 PM Dave Hansen <dave.hansen@xxxxxxxxx> wrote:
>
> I'm having one of those "I hate this all" moments. Look at what we say
> in the code:
>
> > * See the "global_metadata.json" in the "TDX 1.5 ABI definitions".
>
> Basically step one in verifying that this is all right is: Hey, humans,
> please go parse a machine-readable format. That's insanity. If Intel
> wants to publish JSON as the canonical source of truth, that's fine.
> It's great, actually. But let's stop playing human JSON parser and make
> the computers do it for us, OK?
>
> Let's just generate the code. Basically, as long as the generated C is
> marginally readable, I'm OK with it. The most important things are:
>
> 1. Adding a field is dirt simple
> 2. Using the generated C is simple
>
> In 99% of the cases, nobody ends up having to ever look at the generated
> code.
>
> Take a look at the attached python program and generated C file. I
> think they qualify. We can check the script into tools/scripts/ and it
> can get re-run when new json comes out or when a new field is needed.
> You'd could call the generated code like this:

Ok, so let's move this thing forward. Here is a more polished script
and the output. Untested beyond compilation.

Kai, feel free to include it in v6 with my

Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxx>

I made an attempt at adding array support and using it with the CMR
information; just to see if Intel is actually trying to make
global_metadata.json accurate. The original code has

for (i = 0; i < sysinfo_cmr->num_cmrs; i++) {
READ_SYS_INFO(CMR_BASE + i, cmr_base[i]);
READ_SYS_INFO(CMR_SIZE + i, cmr_size[i]);
}

The generated code instead always tries to read 32 fields and returns
non-zero from get_tdx_sys_info_cmr if they are missing. If it fails to
read the fields above NUM_CMRS, just remove that part of the tdx.py
script and make sure that a comment in the code shames the TDX ABI
documentation adequately. :)

Thanks,

Paolo
/* Automatically generated by tdx.py */

static int get_tdx_sys_info_version(struct tdx_sys_info_version *out)
{
int ret = 0;
u64 val;

if (!ret && !(ret = read_sys_metadata_field(0x8800000200000001, &val))) out->build_date = val;
if (!ret && !(ret = read_sys_metadata_field(0x8800000100000002, &val))) out->build_num = val;
if (!ret && !(ret = read_sys_metadata_field(0x0800000100000003, &val))) out->minor_version = val;
if (!ret && !(ret = read_sys_metadata_field(0x0800000100000004, &val))) out->major_version = val;
if (!ret && !(ret = read_sys_metadata_field(0x0800000100000005, &val))) out->update_version = val;
if (!ret && !(ret = read_sys_metadata_field(0x0800000100000006, &val))) out->internal_version = val;

return ret;
}

static int get_tdx_sys_info_features(struct tdx_sys_info_features *out)
{
int ret = 0;
u64 val;

if (!ret && !(ret = read_sys_metadata_field(0x0A00000300000008, &val))) out->tdx_features0 = val;

return ret;
}

static int get_tdx_sys_info_tdmr(struct tdx_sys_info_tdmr *out)
{
int ret = 0;
u64 val;

if (!ret && !(ret = read_sys_metadata_field(0x9100000100000008, &val))) out->max_tdmrs = val;
if (!ret && !(ret = read_sys_metadata_field(0x9100000100000009, &val))) out->max_reserved_per_tdmr = val;
if (!ret && !(ret = read_sys_metadata_field(0x9100000100000010, &val))) out->pamt_4k_entry_size = val;
if (!ret && !(ret = read_sys_metadata_field(0x9100000100000011, &val))) out->pamt_2m_entry_size = val;
if (!ret && !(ret = read_sys_metadata_field(0x9100000100000012, &val))) out->pamt_1g_entry_size = val;

return ret;
}

static int get_tdx_sys_info_cmr(struct tdx_sys_info_cmr *out)
{
int ret = 0;
u64 val;
int i;

if (!ret && !(ret = read_sys_metadata_field(0x9000000100000000, &val))) out->num_cmrs = val;
for (i = 0; i < 32; i++)
if (!ret && !(ret = read_sys_metadata_field(0x9000000300000080 + i, &val))) out->cmr_base[i] = val;
for (i = 0; i < 32; i++)
if (!ret && !(ret = read_sys_metadata_field(0x9000000300000100 + i, &val))) out->cmr_size[i] = val;

return ret;
}
/* Automatically generated by tdx.py */
#ifndef TDX_DATA_GENERATED_H
#define TDX_DATA_GENERATED_H 1

struct tdx_sys_info_version {
u32 build_date;
u16 build_num;
u16 minor_version;
u16 major_version;
u16 update_version;
u16 internal_version;
};

struct tdx_sys_info_features {
u64 tdx_features0;
};

struct tdx_sys_info_tdmr {
u16 max_tdmrs;
u16 max_reserved_per_tdmr;
u16 pamt_4k_entry_size;
u16 pamt_2m_entry_size;
u16 pamt_1g_entry_size;
};

struct tdx_sys_info_cmr {
u16 num_cmrs;
u64 cmr_base[32];
u64 cmr_size[32];
};

#endif
#! /usr/bin/env python3
import json
import sys

# Note: this script does not run as part of the build process.
# It is used to generate structs from the TDX global_metadata.json
# file, and functions to fill in said structs. Rerun it if
# you need more fields.

TDX_STRUCTS = {
"tdx_sys_info_version": [
"BUILD_DATE",
"BUILD_NUM",
"MINOR_VERSION",
"MAJOR_VERSION",
"UPDATE_VERSION",
"INTERNAL_VERSION",
],
"tdx_sys_info_features": [
"TDX_FEATURES0"
],
"tdx_sys_info_tdmr": [
"MAX_TDMRS",
"MAX_RESERVED_PER_TDMR",
"PAMT_4K_ENTRY_SIZE",
"PAMT_2M_ENTRY_SIZE",
"PAMT_1G_ENTRY_SIZE",
],
"tdx_sys_info_cmr": [
"NUM_CMRS", "CMR_BASE", "CMR_SIZE"
],
}


def print_struct(name, fields, file):
print("struct %s {" % (name,), file=file)
for f in fields:
fname = f["Field Name"]
element_bytes = int(f["Element Size (Bytes)"])
element_bits = element_bytes * 8
num_fields = int(f["Num Fields"])
if num_fields == 1:
print("\tu%d %s;" % (element_bits, fname.lower()), file=file)
else:
print(
"\tu%d %s[%d];" % (element_bits, fname.lower(), num_fields), file=file
)
print("};", file=file)


def print_field(number, member, indent, file):
print(
"%sif (!ret && !(ret = read_sys_metadata_field(%s, &val))) out->%s = val;"
% (indent, number, member),
file=file,
)


def print_function(name, fields, file):
print("static int get_%s(struct %s *out)" % (name, name), file=file)
print("{", file=file)

print("\tint ret = 0;", file=file)
print("\tu64 val;", file=file)
for f in fields:
num_fields = int(f["Num Fields"])
if num_fields > 1:
print("\tint i;", file=file)
break

print(file=file)
for f in fields:
num_fields = int(f["Num Fields"])
if num_fields == 1:
print_field(
f["Base FIELD_ID (Hex)"],
f["Field Name"].lower(),
indent="\t",
file=file,
)
else:
print("\tfor (i = 0; i < %d; i++)" % (num_fields,), file=file)
print_field(
f["Base FIELD_ID (Hex)"] + " + i",
f["Field Name"].lower() + "[i]",
indent="\t\t",
file=file,
)

print(file=file)
print("\treturn ret;", file=file)
print("}", file=file)


jsonfile = sys.argv[1]
hfile = sys.argv[2]
cfile = sys.argv[3]

with open(jsonfile, "r") as f:
json_in = json.load(f)
fields = {x["Field Name"]: x for x in json_in["Fields"]}

with open(hfile, "w") as f:
print("/* Automatically generated by tdx.py */", file=f)
print("#ifndef TDX_DATA_GENERATED_H", file=f)
print("#define TDX_DATA_GENERATED_H 1", file=f)
for name, field_names in TDX_STRUCTS.items():
print(file=f)
print_struct(name, [fields[x] for x in field_names], file=f)
print(file=f)
print("#endif", file=f)

with open(cfile, "w") as f:
print("/* Automatically generated by tdx.py */", file=f)
for name, field_names in TDX_STRUCTS.items():
print(file=f)
print_function(name, [fields[x] for x in field_names], file=f)