Re: [RFC PATCH v1 02/10] KVM: s390: Honor storage keys when accessing guest memory

From: Christian Borntraeger
Date: Thu Jan 20 2022 - 04:07:04 EST




Am 20.01.22 um 09:58 schrieb Janis Schoetterl-Glausch:
On 1/20/22 09:50, Christian Borntraeger wrote:


Am 20.01.22 um 09:11 schrieb Janis Schoetterl-Glausch:
On 1/19/22 20:27, Christian Borntraeger wrote:
Am 18.01.22 um 10:52 schrieb Janis Schoetterl-Glausch:
Storage key checking had not been implemented for instructions emulated
by KVM. Implement it by enhancing the functions used for guest access,
in particular those making use of access_guest which has been renamed
to access_guest_with_key.
Accesses via access_guest_real should not be key checked.

For actual accesses, key checking is done by __copy_from/to_user_with_key
(which internally uses MVCOS/MVCP/MVCS).
In cases where accessibility is checked without an actual access,
this is performed by getting the storage key and checking
if the access key matches.
In both cases, if applicable, storage and fetch protection override
are honored.

Signed-off-by: Janis Schoetterl-Glausch <scgl@xxxxxxxxxxxxx>
---
   arch/s390/include/asm/ctl_reg.h |   2 +
   arch/s390/include/asm/page.h    |   2 +
   arch/s390/kvm/gaccess.c         | 174 +++++++++++++++++++++++++++++---
   arch/s390/kvm/gaccess.h         |  78 ++++++++++++--
   arch/s390/kvm/intercept.c       |  12 +--
   arch/s390/kvm/kvm-s390.c        |   4 +-
   6 files changed, 241 insertions(+), 31 deletions(-)

diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 04dc65f8901d..c800199a376b 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -12,6 +12,8 @@
     #define CR0_CLOCK_COMPARATOR_SIGN    BIT(63 - 10)
   #define CR0_LOW_ADDRESS_PROTECTION    BIT(63 - 35)
+#define CR0_FETCH_PROTECTION_OVERRIDE    BIT(63 - 38)
+#define CR0_STORAGE_PROTECTION_OVERRIDE    BIT(63 - 39)
   #define CR0_EMERGENCY_SIGNAL_SUBMASK    BIT(63 - 49)
   #define CR0_EXTERNAL_CALL_SUBMASK    BIT(63 - 50)
   #define CR0_CLOCK_COMPARATOR_SUBMASK    BIT(63 - 52)
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index d98d17a36c7b..cfc4d6fb2385 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -20,6 +20,8 @@
   #define PAGE_SIZE    _PAGE_SIZE
   #define PAGE_MASK    _PAGE_MASK
   #define PAGE_DEFAULT_ACC    0
+/* storage-protection override */
+#define PAGE_SPO_ACC        9
   #define PAGE_DEFAULT_KEY    (PAGE_DEFAULT_ACC << 4)
     #define HPAGE_SHIFT    20
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4460808c3b9a..92ab96d55504 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -10,6 +10,7 @@
   #include <linux/mm_types.h>
   #include <linux/err.h>
   #include <linux/pgtable.h>
+#include <linux/bitfield.h>
     #include <asm/gmap.h>
   #include "kvm-s390.h"
@@ -794,6 +795,79 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
       return 1;
   }
   +static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
+                       union asce asce)
+{
+    psw_t *psw = &vcpu->arch.sie_block->gpsw;
+    unsigned long override;
+
+    if (mode == GACC_FETCH || mode == GACC_IFETCH) {
+        /* check if fetch protection override enabled */
+        override = vcpu->arch.sie_block->gcr[0];
+        override &= CR0_FETCH_PROTECTION_OVERRIDE;
+        /* not applicable if subject to DAT && private space */
+        override = override && !(psw_bits(*psw).dat && asce.p);
+        return override;
+    }
+    return false;
+}
+
+static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
+{
+    return ga < 2048 && ga + len <= 2048;
+}
+
+static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
+{
+    /* check if storage protection override enabled */
+    return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
+}
+
+static bool storage_prot_override_applies(char access_control)
+{
+    /* matches special storage protection override key (9) -> allow */
+    return access_control == PAGE_SPO_ACC;
+}
+
+static int vcpu_check_access_key(struct kvm_vcpu *vcpu, char access_key,
+                 enum gacc_mode mode, union asce asce, gpa_t gpa,
+                 unsigned long ga, unsigned int len)
+{
+    unsigned char storage_key, access_control;
+    unsigned long hva;
+    int r;
+
+    /* access key 0 matches any storage key -> allow */
+    if (access_key == 0)
+        return 0;
+    /*
+     * caller needs to ensure that gfn is accessible, so we can
+     * assume that this cannot fail
+     */
+    hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
+    mmap_read_lock(current->mm);
+    r = get_guest_storage_key(current->mm, hva, &storage_key);
+    mmap_read_unlock(current->mm);
+    if (r)
+        return r;
+    access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
+    /* access key matches storage key -> allow */
+    if (access_control == access_key)
+        return 0;
+    if (mode == GACC_FETCH || mode == GACC_IFETCH) {
+        /* mismatching keys, no fetch protection -> allowed */
+        if (!(storage_key & _PAGE_FP_BIT))
+            return 0;
+        if (fetch_prot_override_applicable(vcpu, mode, asce))
+            if (fetch_prot_override_applies(ga, len))
+                return 0;
+    }
+    if (storage_prot_override_applicable(vcpu))
+        if (storage_prot_override_applies(access_control))
+            return 0;
+    return PGM_PROTECTION;
+}

This function is just a pre-check (and early-exit) and we do an additional final check
in the MVCOS routing later on, correct? It might actually be faster to get rid of this

No, this exists for those cases that do not do an actual access, that is MEMOPs with
the check only flag, as well as the TEST PROTECTION emulation. access_guest_with_key
passes key 0 so we take the early return. It's easy to miss so Janosch suggested a comment there.

Dont we always call it in guest_range_to_gpas, which is also called for the memory access in
access_guest_with_key?
@@ -904,16 +1018,37 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
if (!gpas)
return -ENOMEM;
+ try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
+ try_storage_prot_override = storage_prot_override_applicable(vcpu);
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode);
- for (idx = 0; idx < nr_pages && !rc; idx++) {
+ rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);

Yes, but the key is 0 in that case, so we don't do any key checking. ^

So yes, we need a comment :-)