[PATCH RFC 04/10] KVM: VMX: Introduce the SPPTP and SPP page table.

From: Zhang Yi
Date: Fri Oct 13 2017 - 10:27:58 EST


From: Zhang Yi Z <yi.z.zhang@xxxxxxxxxxxxxxx>

SPPT has 4-level paging structure that is similar to EPT
except L1E.

The sub-page permission table is referenced via a 64-bit control field
called Sub-Page Permission Table Pointer (SPPTP) which contains a
4K-aligned physical address. the index and encoding for this VMCS field
is defined 0x2030 at this time

The format of SPPTP is shown in below figure

-------------------------------------------------------------------------
| Bit | Contents |
| | |
:-----------------------------------------------------------------------|
| 11:0 | Reserved (0) |
| N-1:12 | Physical address of 4KB aligned SPPT L4E Table |
| 51:N | Reserved (0) |
| 63:52 | Reserved (0) |
------------------------------------------------------------------------|

Note: N is the physical address width supported by the processor.

This patch introduced the Spp paging structures, which root page will
created at kvm mmu page initialization. and free at mmu page free.

Same as EPT page table, We initialized the SPPT,
and write the SPPT point into VMCS field.
Also we added a mmu page role type spp to distinguish it is a spp page
or a EPT page.

Signed-off-by: Zhang Yi Z <yi.z.zhang@xxxxxxxxxxxxxxx>
Signed-off-by: He Chen <he.chen@xxxxxxxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 4 +++-
arch/x86/include/asm/vmx.h | 2 ++
arch/x86/kvm/mmu.c | 39 +++++++++++++++++++++++++++++++++++++--
arch/x86/kvm/vmx.c | 16 ++++++++++++++++
4 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c73e493..5e8fdda 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -256,7 +256,8 @@ union kvm_mmu_page_role {
unsigned smep_andnot_wp:1;
unsigned smap_andnot_wp:1;
unsigned ad_disabled:1;
- unsigned :7;
+ unsigned spp:1;
+ unsigned reserved:6;

/*
* This is left at the top of the word so that
@@ -345,6 +346,7 @@ struct kvm_mmu {
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte);
hpa_t root_hpa;
+ hpa_t sppt_root;
union kvm_mmu_page_role base_role;
u8 root_level;
u8 shadow_root_level;
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 633dff5..55bac23 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -211,6 +211,8 @@ enum vmcs_field {
VMWRITE_BITMAP = 0x00002028,
XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ SPPT_POINTER = 0x00002030,
+ SPPT_POINTER_HIGH = 0x00002031,
TSC_MULTIPLIER = 0x00002032,
TSC_MULTIPLIER_HIGH = 0x00002033,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eca30c1..32a374c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2287,6 +2287,28 @@ static void clear_sp_write_flooding_count(u64 *spte)
__clear_sp_write_flooding_count(sp);
}

+static struct kvm_mmu_page *kvm_mmu_get_spp_page(struct kvm_vcpu *vcpu,
+ gfn_t gfn,
+ unsigned level)
+
+{
+ struct kvm_mmu_page *sp;
+ union kvm_mmu_page_role role;
+
+ role = vcpu->arch.mmu.base_role;
+ role.level = level;
+ role.direct = true;
+ role.spp = true;
+
+ sp = kvm_mmu_alloc_page(vcpu, true);
+ sp->gfn = gfn;
+ sp->role = role;
+ hlist_add_head(&sp->hash_link,
+ &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
+ clear_page(sp->spt);
+ return sp;
+}
+
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gfn_t gfn,
gva_t gaddr,
@@ -3319,7 +3341,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
static void mmu_free_roots(struct kvm_vcpu *vcpu)
{
int i;
- struct kvm_mmu_page *sp;
+ struct kvm_mmu_page *sp, *spp_sp;
LIST_HEAD(invalid_list);

if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
@@ -3329,16 +3351,24 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
(vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL ||
vcpu->arch.mmu.direct_map)) {
hpa_t root = vcpu->arch.mmu.root_hpa;
+ hpa_t spp_root = vcpu->arch.mmu.sppt_root;

spin_lock(&vcpu->kvm->mmu_lock);
sp = page_header(root);
+ spp_sp = page_header(spp_root);
--sp->root_count;
+ --spp_sp->root_count;
if (!sp->root_count && sp->role.invalid) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
}
+ if (!spp_sp->root_count && spp_sp->role.invalid) {
+ kvm_mmu_prepare_zap_page(vcpu->kvm, spp_sp, &invalid_list);
+ kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+ }
spin_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+ vcpu->arch.mmu.sppt_root = INVALID_PAGE;
return;
}

@@ -3375,7 +3405,7 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)

static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
{
- struct kvm_mmu_page *sp;
+ struct kvm_mmu_page *sp, *spp_sp;
unsigned i;

if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) {
@@ -3386,9 +3416,13 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
}
sp = kvm_mmu_get_page(vcpu, 0, 0,
vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
+ spp_sp = kvm_mmu_get_spp_page(vcpu, 0,
+ vcpu->arch.mmu.shadow_root_level);
++sp->root_count;
+ ++spp_sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = __pa(sp->spt);
+ vcpu->arch.mmu.sppt_root = __pa(spp_sp->spt);
} else if (vcpu->arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) {
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
@@ -5021,6 +5055,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
{
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+ vcpu->arch.mmu.sppt_root = INVALID_PAGE;
vcpu->arch.mmu.translate_gpa = translate_gpa;
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1a2ca87..a4ac08a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -253,6 +253,7 @@ struct __packed vmcs12 {
u64 eoi_exit_bitmap3;
u64 eptp_list_address;
u64 xss_exit_bitmap;
+ u64 sppt_pointer;
u64 guest_physical_address;
u64 vmcs_link_pointer;
u64 pml_address;
@@ -775,6 +776,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
+ FIELD64(SPPT_POINTER, sppt_pointer),
FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
FIELD64(PML_ADDRESS, pml_address),
@@ -4323,10 +4325,16 @@ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
return eptp;
}

+static inline u64 construct_spptp(unsigned long root_hpa)
+{
+ return root_hpa & PAGE_MASK;
+}
+
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
unsigned long guest_cr3;
u64 eptp;
+ u64 spptp;

guest_cr3 = cr3;
if (enable_ept) {
@@ -4339,6 +4347,12 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
ept_load_pdptrs(vcpu);
}

+ if ((vcpu->arch.mmu.sppt_root != INVALID_PAGE) &&
+ enable_ept_spp) {
+ spptp = construct_spptp(vcpu->arch.mmu.sppt_root);
+ vmcs_write64(SPPT_POINTER, spptp);
+ }
+
vmx_flush_tlb(vcpu);
vmcs_writel(GUEST_CR3, guest_cr3);
}
@@ -8754,6 +8768,8 @@ static void dump_vmcs(void)
pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
+ if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_SPP))
+ pr_err("SPPT pointer = 0x%016llx\n", vmcs_read64(SPPT_POINTER));
n = vmcs_read32(CR3_TARGET_COUNT);
for (i = 0; i + 1 < n; i += 4)
pr_err("CR3 target%u=%016lx target%u=%016lx\n",
--
2.7.4