[PATCH RFC v3 39/43] KVM: guest_memfd: Apply content modes while setting memory attributes
From: Ackerley Tng
Date: Fri Mar 13 2026 - 02:27:57 EST
Provide defined memory content modes so that KVM can make guarantees about
memory content after setting memory attributes, according to userspace
requests.
Suggested-by: Sean Christoperson <seanjc@xxxxxxxxxx>
Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx>
---
Documentation/virt/kvm/api.rst | 40 +++++++++++++++++++++++++++++
include/uapi/linux/kvm.h | 4 +++
virt/kvm/guest_memfd.c | 58 ++++++++++++++++++++++++++++++++++++++++--
3 files changed, 100 insertions(+), 2 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 15148c80cfdb6..3ec92f8606099 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6571,6 +6571,8 @@ Errors:
EAGAIN Some page within requested range had unexpected refcounts. The
offset of the page will be returned in `error_offset`.
ENOMEM Ran out of memory trying to track private/shared state
+ EOPNOTSUPP There is no way for KVM to guarantee in-memory contents as
+ requested.
========== ===============================================================
KVM_SET_MEMORY_ATTRIBUTES2 is an extension to
@@ -6619,6 +6621,44 @@ on the shared pages, such as refcounts taken by get_user_pages(), and
try the ioctl again. A possible source of these long term refcounts is
if the guest_memfd memory was pinned in IOMMU page tables.
+By default, KVM makes no guarantees about the in-memory values after
+memory is convert to/from shared/private. Optionally, userspace may
+instruct KVM to ensure the contents of memory are zeroed or preserved,
+e.g. to enable in-place sharing of data, or as an optimization to
+avoid having to re-zero memory when userspace could have relied on the
+trusted entity to guarantee the memory will be zeroed as part of the
+entire conversion process.
+
+The content modes available are as follows:
+
+``KVM_SET_MEMORY_ATTRIBUTES2_ZERO``
+
+ On conversion, KVM guarantees all entities that have "allowed"
+ access to the memory will read zeros. E.g. on private to shared
+ conversion, both trusted and untrusted code will read zeros.
+
+ Zeroing is currently only guaranteed for private-to-shared
+ conversions, as KVM in general is untrusted and thus cannot
+ guarantee the guest (or any trusted entity) will read zeros after
+ conversion. Note, some CoCo implementations do zero memory contents
+ such that the guest reads zeros after conversion, and the guest may
+ choose to rely on that behavior. However, that's a contract between
+ the trusted CoCo entity and the guest, not between KVM and the
+ guest.
+
+``KVM_SET_MEMORY_ATTRIBUTES2_PRESERVE``
+
+ On conversion, KVM guarantees memory contents will be preserved with
+ respect to the last written unencrypted value. As a concrete
+ example, if the host writes ``0xbeef`` to shared memory and converts
+ the memory to private, the guest will also read ``0xbeef``, even if
+ the in-memory data is encrypted as part of the conversion. And vice
+ versa, if the guest writes ``0xbeef`` to private memory and then
+ converts the memory to shared, the host (and guest) will read
+ ``0xbeef`` (if the memory is accessible).
+
+TODO: Document CAP after CAP discussion.
+
See also: :ref: `KVM_SET_MEMORY_ATTRIBUTES`.
.. _kvm_run:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 29baaa60de35a..0fc9ad4ea0d93 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1642,6 +1642,10 @@ struct kvm_memory_attributes {
/* Available with KVM_CAP_MEMORY_ATTRIBUTES2 */
#define KVM_SET_MEMORY_ATTRIBUTES2 _IOWR(KVMIO, 0xd2, struct kvm_memory_attributes2)
+#define KVM_SET_MEMORY_ATTRIBUTES2_MODE_UNSPECIFIED 0
+#define KVM_SET_MEMORY_ATTRIBUTES2_ZERO (1ULL << 0)
+#define KVM_SET_MEMORY_ATTRIBUTES2_PRESERVE (1ULL << 1)
+
struct kvm_memory_attributes2 {
union {
__u64 address;
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index f23acbca28e54..6bfcb2ed12c61 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -674,8 +674,50 @@ int __weak kvm_arch_gmem_apply_content_mode_preserve(struct kvm *kvm,
return -EOPNOTSUPP;
}
+static int kvm_gmem_apply_content_mode_folio(struct kvm *kvm,
+ struct folio *folio,
+ uint64_t content_mode)
+{
+ switch (content_mode) {
+ case KVM_SET_MEMORY_ATTRIBUTES2_MODE_UNSPECIFIED:
+ return kvm_arch_gmem_apply_content_mode_unspecified(kvm, folio);
+ case KVM_SET_MEMORY_ATTRIBUTES2_ZERO:
+ return kvm_arch_gmem_apply_content_mode_zero(kvm, folio);
+ case KVM_SET_MEMORY_ATTRIBUTES2_PRESERVE:
+ return kvm_arch_gmem_apply_content_mode_preserve(kvm, folio);
+ default:
+ WARN_ONCE(1, "Unexpected policy requested.");
+ return -EOPNOTSUPP;
+ }
+}
+
+static void kvm_gmem_apply_content_mode(struct inode *inode, pgoff_t start,
+ pgoff_t end, struct kvm *kvm,
+ uint64_t content_mode)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct folio_batch fbatch;
+ int i;
+
+ folio_batch_init(&fbatch);
+ while (filemap_get_folios(mapping, &start, end - 1, &fbatch)) {
+
+ for (i = 0; i < folio_batch_count(&fbatch); ++i) {
+ struct folio *folio = fbatch.folios[i];
+ int ret;
+
+ ret = kvm_gmem_apply_content_mode_folio(kvm, folio,
+ content_mode);
+ WARN_ON_ONCE(ret);
+ }
+
+ folio_batch_release(&fbatch);
+ }
+}
+
static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
size_t nr_pages, uint64_t attrs,
+ struct kvm *kvm, uint64_t content_mode,
pgoff_t *err_index)
{
struct address_space *mapping = inode->i_mapping;
@@ -689,6 +731,12 @@ static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
filemap_invalidate_lock(mapping);
+ if (content_mode &&
+ !(kvm_gmem_supported_content_modes(kvm) & content_mode)) {
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+
mas_init(&mas, mt, start);
if (kvm_gmem_range_has_attributes(mt, start, nr_pages, attrs)) {
@@ -715,6 +763,8 @@ static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
kvm_gmem_invalidate_begin(inode, start, end);
+ kvm_gmem_apply_content_mode(inode, start, end, kvm, content_mode);
+
mas_store_prealloc(&mas, xa_mk_value(attrs));
kvm_gmem_invalidate_end(inode, start, end);
@@ -736,7 +786,11 @@ static long kvm_gmem_set_attributes(struct file *file, void __user *argp)
if (copy_from_user(&attrs, argp, sizeof(attrs)))
return -EFAULT;
- if (attrs.flags)
+ if (attrs.flags & ~(KVM_SET_MEMORY_ATTRIBUTES2_ZERO |
+ KVM_SET_MEMORY_ATTRIBUTES2_PRESERVE))
+ return -EINVAL;
+ if ((attrs.flags & KVM_SET_MEMORY_ATTRIBUTES2_ZERO) &&
+ (attrs.flags & KVM_SET_MEMORY_ATTRIBUTES2_PRESERVE))
return -EINVAL;
if (attrs.error_offset)
return -EINVAL;
@@ -758,7 +812,7 @@ static long kvm_gmem_set_attributes(struct file *file, void __user *argp)
nr_pages = attrs.size >> PAGE_SHIFT;
index = attrs.offset >> PAGE_SHIFT;
r = __kvm_gmem_set_attributes(inode, index, nr_pages, attrs.attributes,
- &err_index);
+ f->kvm, attrs.flags, &err_index);
if (r) {
attrs.error_offset = err_index << PAGE_SHIFT;
--
2.53.0.851.ga537e3e6e9-goog