[PATCH v4 0/9] KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg

From: Janis Schoetterl-Glausch
Date: Tue Dec 13 2022 - 11:54:37 EST


User space can use the MEM_OP ioctl to make storage key checked reads
and writes to the guest, however, it has no way of performing atomic,
key checked, accesses to the guest.
Extend the MEM_OP ioctl in order to allow for this, by adding a cmpxchg
mode. For now, support this mode for absolute accesses only.

This mode can be use, for example, to set the device-state-change
indicator and the adapter-local-summary indicator atomically.

Also contains some fixes/changes for the memop selftest independent of
the cmpxchg changes.

v3 -> v4
* no functional change intended
* rework documentation a bit
* name extension cap cmpxchg bit
* picked up R-b (thanks Thomas)
* various changes (rename variable, comments, ...) see range-diff below

v2 -> v3
* rebase onto the wip/cmpxchg_user_key branch in the s390 kernel repo
* use __uint128_t instead of unsigned __int128
* put moving of testlist into main into separate patch
* pick up R-b's (thanks Nico)

v1 -> v2
* get rid of xrk instruction for cmpxchg byte and short implementation
* pass old parameter via pointer instead of in mem_op struct
* indicate failure of cmpxchg due to wrong old value by special return
code
* picked up R-b's (thanks Thomas)

Janis Schoetterl-Glausch (9):
KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg
Documentation: KVM: s390: Describe KVM_S390_MEMOP_F_CMPXCHG
KVM: s390: selftest: memop: Pass mop_desc via pointer
KVM: s390: selftest: memop: Replace macros by functions
KVM: s390: selftest: memop: Move testlist into main
KVM: s390: selftest: memop: Add cmpxchg tests
KVM: s390: selftest: memop: Add bad address test
KVM: s390: selftest: memop: Fix typo
KVM: s390: selftest: memop: Fix wrong address being used in test

Documentation/virt/kvm/api.rst | 20 +-
include/uapi/linux/kvm.h | 7 +
arch/s390/kvm/gaccess.h | 3 +
arch/s390/kvm/gaccess.c | 102 ++++
arch/s390/kvm/kvm-s390.c | 39 +-
tools/testing/selftests/kvm/s390x/memop.c | 670 +++++++++++++++++-----
6 files changed, 689 insertions(+), 152 deletions(-)

Range-diff against v3:
1: ebb86a0c90a2 ! 1: 75a20d2e27f2 KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg
@@ include/uapi/linux/kvm.h: struct kvm_s390_mem_op {
__u8 ar; /* the access register number */
__u8 key; /* access key, ignored if flag unset */
+ __u8 pad1[6]; /* ignored */
-+ __u64 old_p; /* ignored if flag unset */
++ __u64 old_addr; /* ignored if flag unset */
};
__u32 sida_offset; /* offset into the sida */
__u8 reserved[32]; /* ignored */
@@ include/uapi/linux/kvm.h: struct kvm_s390_mem_op {
#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2)
+#define KVM_S390_MEMOP_F_CMPXCHG (1ULL << 3)
++/* flags specifying extension support */
++#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG 0x2
+/* Non program exception return codes (pgm codes are 16 bit) */
-+#define KVM_S390_MEMOP_R_NO_XCHG ((1 << 16) + 0)
++#define KVM_S390_MEMOP_R_NO_XCHG (1 << 16)

/* for KVM_INTERRUPT */
struct kvm_interrupt {
@@ arch/s390/kvm/gaccess.c: int access_guest_real(struct kvm_vcpu *vcpu, unsigned l
+ * @gpa: Absolute guest address of the location to be changed.
+ * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
+ * non power of two will result in failure.
-+ * @old_p: Pointer to old value. If the location at @gpa contains this value, the
++ * @old_addr: Pointer to old value. If the location at @gpa contains this value, the
+ * exchange will succeed. After calling cmpxchg_guest_abs_with_key() *@old
+ * contains the value at @gpa before the attempt to exchange the value.
+ * @new: The value to place at @gpa.
@@ arch/s390/kvm/gaccess.c: int access_guest_real(struct kvm_vcpu *vcpu, unsigned l
+ * not be attempted
+ * * -EINVAL: address misaligned or len not power of two
+ * * -EAGAIN: transient failure (len 1 or 2)
++ * * -EOPNOTSUPP: read-only memslot (should never occur)
+ */
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
-+ __uint128_t *old_p, __uint128_t new,
++ __uint128_t *old_addr, __uint128_t new,
+ u8 access_key)
+{
+ gfn_t gfn = gpa >> PAGE_SHIFT;
@@ arch/s390/kvm/gaccess.c: int access_guest_real(struct kvm_vcpu *vcpu, unsigned l
+ case 1: {
+ u8 old;
+
-+ ret = cmpxchg_user_key((u8 *)hva, &old, *old_p, new, access_key);
-+ ret = ret < 0 ? ret : old != *old_p;
-+ *old_p = old;
++ ret = cmpxchg_user_key((u8 *)hva, &old, *old_addr, new, access_key);
++ ret = ret < 0 ? ret : old != *old_addr;
++ *old_addr = old;
+ break;
+ }
+ case 2: {
+ u16 old;
+
-+ ret = cmpxchg_user_key((u16 *)hva, &old, *old_p, new, access_key);
-+ ret = ret < 0 ? ret : old != *old_p;
-+ *old_p = old;
++ ret = cmpxchg_user_key((u16 *)hva, &old, *old_addr, new, access_key);
++ ret = ret < 0 ? ret : old != *old_addr;
++ *old_addr = old;
+ break;
+ }
+ case 4: {
+ u32 old;
+
-+ ret = cmpxchg_user_key((u32 *)hva, &old, *old_p, new, access_key);
-+ ret = ret < 0 ? ret : old != *old_p;
-+ *old_p = old;
++ ret = cmpxchg_user_key((u32 *)hva, &old, *old_addr, new, access_key);
++ ret = ret < 0 ? ret : old != *old_addr;
++ *old_addr = old;
+ break;
+ }
+ case 8: {
+ u64 old;
+
-+ ret = cmpxchg_user_key((u64 *)hva, &old, *old_p, new, access_key);
-+ ret = ret < 0 ? ret : old != *old_p;
-+ *old_p = old;
++ ret = cmpxchg_user_key((u64 *)hva, &old, *old_addr, new, access_key);
++ ret = ret < 0 ? ret : old != *old_addr;
++ *old_addr = old;
+ break;
+ }
+ case 16: {
+ __uint128_t old;
+
-+ ret = cmpxchg_user_key((__uint128_t *)hva, &old, *old_p, new, access_key);
-+ ret = ret < 0 ? ret : old != *old_p;
-+ *old_p = old;
++ ret = cmpxchg_user_key((__uint128_t *)hva, &old, *old_addr, new, access_key);
++ ret = ret < 0 ? ret : old != *old_addr;
++ *old_addr = old;
+ break;
+ }
+ default:
@@ arch/s390/kvm/kvm-s390.c: int kvm_vm_ioctl_check_extension(struct kvm *kvm, long
+ * The first extension doesn't use a flag, but pretend it does,
+ * this way that can be changed in the future.
+ */
-+ r = 0x3;
++ r = KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG | 1;
+ break;
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
@@ arch/s390/kvm/kvm-s390.c: static bool access_key_invalid(u8 access_key)
static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
-+ void __user *old_p = (void __user *)mop->old_p;
++ void __user *old_addr = (void __user *)mop->old_addr;
+ union {
+ __uint128_t quad;
+ char raw[sizeof(__uint128_t)];
+ } old = { .quad = 0}, new = { .quad = 0 };
-+ unsigned int off_in_quad = sizeof(__uint128_t) - mop->size;
++ unsigned int off_in_quad = sizeof(new) - mop->size;
u64 supported_flags;
void *tmpbuf = NULL;
int r, srcu_idx;
@@ arch/s390/kvm/kvm-s390.c: static int kvm_s390_vm_mem_op(struct kvm *kvm, struct
mop->key = 0;
}
+ if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) {
++ /*
++ * This validates off_in_quad. Checking that size is a power
++ * of two is not necessary, as cmpxchg_guest_abs_with_key
++ * takes care of that
++ */
+ if (mop->size > sizeof(new))
+ return -EINVAL;
-+ /* off_in_quad has been validated */
+ if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
+ return -EFAULT;
-+ if (copy_from_user(&old.raw[off_in_quad], old_p, mop->size))
++ if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
+ return -EFAULT;
+ }
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
@@ arch/s390/kvm/kvm-s390.c: static int kvm_s390_vm_mem_op(struct kvm *kvm, struct
+ &old.quad, new.quad, mop->key);
+ if (r == 1) {
+ r = KVM_S390_MEMOP_R_NO_XCHG;
-+ if (copy_to_user(old_p, &old.raw[off_in_quad], mop->size))
++ if (copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
+ r = -EFAULT;
+ }
} else {
2: 0cb750e8d182 ! 2: 5c5ad96a4c81 Documentation: KVM: s390: Describe KVM_S390_MEMOP_F_CMPXCHG
@@ Documentation/virt/kvm/api.rst: The fields in each entry are defined as follows:
< 0 on generic error (e.g. -EFAULT or -ENOMEM),
- > 0 if an exception occurred while walking the page tables
+ 16 bit program exception code if the access causes such an exception
-+ other code > maximum 16 bit value with special meaning
++ other code > 0xffff with special meaning

Read or write data from/to the VM's memory.
The KVM_CAP_S390_MEM_OP_EXTENSION capability specifies what functionality is
@@ Documentation/virt/kvm/api.rst: Parameters are specified via the following struc
__u8 ar; /* the access register number */
__u8 key; /* access key, ignored if flag unset */
+ __u8 pad1[6]; /* ignored */
-+ __u64 old_p; /* ignored if flag unset */
++ __u64 old_addr; /* ignored if flag unset */
};
__u32 sida_offset; /* offset into the sida */
__u8 reserved[32]; /* ignored */
@@ Documentation/virt/kvm/api.rst: Absolute accesses are permitted for non-protecte
* ``KVM_S390_MEMOP_F_SKEY_PROTECTION``
+ * ``KVM_S390_MEMOP_F_CMPXCHG``
+
-+The semantics of the flags common with logical acesses are as for logical
++The semantics of the flags common with logical accesses are as for logical
+accesses.
+
-+For write accesses, the KVM_S390_MEMOP_F_CMPXCHG might be supported.
-+In this case, instead of doing an unconditional write, the access occurs only
-+if the target location contains the "size" byte long value pointed to by
-+"old_p". This is performed as an atomic cmpxchg. "size" must be a power of two
-+up to and including 16.
-+The value at the target location is written to the location "old_p" points to.
++For write accesses, the KVM_S390_MEMOP_F_CMPXCHG flag is supported if
++KVM_CAP_S390_MEM_OP_EXTENSION has flag KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG set.
++In this case, instead of doing an unconditional write, the access occurs
++only if the target location contains the value pointed to by "old_addr".
++This is performed as an atomic cmpxchg with the length specified by the "size"
++parameter. "size" must be a power of two up to and including 16.
+If the exchange did not take place because the target value doesn't match the
-+old value KVM_S390_MEMOP_R_NO_XCHG is returned.
-+The KVM_S390_MEMOP_F_CMPXCHG flag is supported if KVM_CAP_S390_MEM_OP_EXTENSION
-+has bit 1 (i.e. bit with value 2) set.
++old value, KVM_S390_MEMOP_R_NO_XCHG is returned.
++In this case the value "old_addr" points to is replaced by the target value.

-The semantics of the flags are as for logical accesses.

3: ecd3f9d6bc9f = 3: 9cbcb313d91d KVM: s390: selftest: memop: Pass mop_desc via pointer
4: 3b7124d69a90 = 4: 21d98b9deaae KVM: s390: selftest: memop: Replace macros by functions
5: c380623abd0d ! 5: 866fcd7fbc97 KVM: s390: selftest: memop: Move testlist into main
@@ Commit message
certain bits are set in the memop extension capability.

Signed-off-by: Janis Schoetterl-Glausch <scgl@xxxxxxxxxxxxx>
+ Reviewed-by: Thomas Huth <thuth@xxxxxxxxxx>

## tools/testing/selftests/kvm/s390x/memop.c ##
@@ tools/testing/selftests/kvm/s390x/memop.c: static void test_errors(void)
6: f4491194821a ! 6: c3e473677786 KVM: s390: selftest: memop: Add cmpxchg tests
@@ tools/testing/selftests/kvm/s390x/memop.c: static struct kvm_s390_mem_op ksmo_fr
}
+ if (desc->old) {
+ ksmo.flags |= KVM_S390_MEMOP_F_CMPXCHG;
-+ ksmo.old_p = (uint64_t)desc->old;
++ ksmo.old_addr = (uint64_t)desc->old;
+ }
if (desc->_ar)
ksmo.ar = desc->ar;
@@ tools/testing/selftests/kvm/s390x/memop.c: static void print_memop(struct kvm_vc
}
- printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u",
- ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key);
-+ printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_p=%llx",
++ printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_addr=%llx",
+ ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key,
-+ ksmo->old_p);
++ ksmo->old_addr);
if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY)
printf(", CHECK_ONLY");
if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION)
@@ tools/testing/selftests/kvm/s390x/memop.c: static void test_copy_key(void)
+ }
+}
+
-+static bool _cmpxchg(int size, void *target, __uint128_t *old_p, __uint128_t new)
++static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t new)
+{
+ bool ret;
+
+ switch (size) {
+ case 4: {
-+ uint32_t old = *old_p;
++ uint32_t old = *old_addr;
+
+ asm volatile ("cs %[old],%[new],%[address]"
+ : [old] "+d" (old),
@@ tools/testing/selftests/kvm/s390x/memop.c: static void test_copy_key(void)
+ : [new] "d" ((uint32_t)new)
+ : "cc"
+ );
-+ ret = old == (uint32_t)*old_p;
-+ *old_p = old;
++ ret = old == (uint32_t)*old_addr;
++ *old_addr = old;
+ return ret;
+ }
+ case 8: {
-+ uint64_t old = *old_p;
++ uint64_t old = *old_addr;
+
+ asm volatile ("csg %[old],%[new],%[address]"
+ : [old] "+d" (old),
@@ tools/testing/selftests/kvm/s390x/memop.c: static void test_copy_key(void)
+ : [new] "d" ((uint64_t)new)
+ : "cc"
+ );
-+ ret = old == (uint64_t)*old_p;
-+ *old_p = old;
++ ret = old == (uint64_t)*old_addr;
++ *old_addr = old;
+ return ret;
+ }
+ case 16: {
-+ __uint128_t old = *old_p;
++ __uint128_t old = *old_addr;
+
+ asm volatile ("cdsg %[old],%[new],%[address]"
+ : [old] "+d" (old),
@@ tools/testing/selftests/kvm/s390x/memop.c: static void test_copy_key(void)
+ : [new] "d" (new)
+ : "cc"
+ );
-+ ret = old == *old_p;
-+ *old_p = old;
++ ret = old == *old_addr;
++ *old_addr = old;
+ return ret;
+ }
+ }
7: 8009dd0fb795 = 7: 90288760656e KVM: s390: selftest: memop: Add bad address test
8: cd1c47941014 = 8: e3d4b9b2ba61 KVM: s390: selftest: memop: Fix typo
9: 41b08e886566 = 9: 13fedd6e3d9e KVM: s390: selftest: memop: Fix wrong address being used in test

base-commit: 739ad2e4e15b585a0eaf98b7bdee62b2dd9588c9
--
2.34.1