[PATCH v7 9/9] x86/mmu: Allocate/free PASID

From: Fenghua Yu
Date: Thu Aug 27 2020 - 11:08:06 EST


A PASID is allocated for an "mm" the first time any thread binds
to an SVM capable device and is freed from the "mm" when the SVM is
unbound by the last thread. It's possible for the "mm" to have different
PASID values in different binding/unbinding SVM cycles.

The mm's PASID (non-zero for valid PASID or 0 for invalid PASID) is
propagated to per-thread PASID MSR for all threads within the mm through
through IPI, context switch, or inherit to ensure a running thread has
the right PASID MSR matching the mm's PASID.

Suggested-by: Andy Lutomirski <luto@xxxxxxxxxx>
Signed-off-by: Fenghua Yu <fenghua.yu@xxxxxxxxx>
Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx>
---
v7:
- Don't fix up PASID in #GP. Instead, update the PASID MSR by IPI and
context switch after PASID allocation and free. Inherit PASID from
parent. (Andy)

Before v7:
- Allocate a PASID for the mm and free it until mm exit.

arch/x86/include/asm/disabled-features.h | 2 +-
arch/x86/include/asm/fpu/api.h | 12 +++++
arch/x86/include/asm/fpu/internal.h | 2 +
arch/x86/kernel/fpu/xstate.c | 56 ++++++++++++++++++++++++
drivers/iommu/intel/svm.c | 28 +++++++++++-
5 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 588d83e9da49..5861d34f9771 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -56,7 +56,7 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif

-#ifdef CONFIG_INTEL_IOMMU_SVM
+#ifdef CONFIG_IOMMU_SUPPORT
# define DISABLE_ENQCMD 0
#else
# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index b774c52e5411..dcd9503b1098 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -62,4 +62,16 @@ extern void switch_fpu_return(void);
*/
extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);

+/*
+ * Tasks that are not using SVA have mm->pasid set to zero to note that they
+ * will not have the valid bit set in MSR_IA32_PASID while they are running.
+ */
+#define PASID_DISABLED 0
+
+#ifdef CONFIG_IOMMU_SUPPORT
+/* Update current's PASID MSR/state by mm's PASID. */
+void update_pasid(void);
+#else
+static inline void update_pasid(void) { }
+#endif
#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0a460f2a3f90..2d737e02b59a 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -583,6 +583,8 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
pkru_val = pk->pkru;
}
__write_pkru(pkru_val);
+
+ update_pasid();
}

/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 67f1a03b9b23..556040e14f1c 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1402,3 +1402,59 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
+#ifdef CONFIG_IOMMU_SUPPORT
+void update_pasid(void)
+{
+ u64 pasid_state;
+ u32 pasid;
+
+ if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+ return;
+
+ if (!current->mm)
+ return;
+
+ pasid = READ_ONCE(current->mm->pasid);
+ /* Set the valid bit in the PASID MSR/state only for valid pasid. */
+ pasid_state = pasid == PASID_DISABLED ?
+ pasid : pasid | MSR_IA32_PASID_VALID;
+
+ /*
+ * No need to hold fregs_lock() since the task's fpstate won't
+ * be changed by others (e.g. ptrace) while the task is being
+ * switched to or is in IPI.
+ */
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
+ /* The MSR is active and can be directly updated. */
+ wrmsrl(MSR_IA32_PASID, pasid_state);
+ } else {
+ struct fpu *fpu = &current->thread.fpu;
+ struct ia32_pasid_state *ppasid_state;
+ struct xregs_state *xsave;
+
+ /*
+ * The CPU's xstate registers are not currently active. Just
+ * update the PASID state in the memory buffer here. The
+ * PASID MSR will be loaded when returning to user mode.
+ */
+ xsave = &fpu->state.xsave;
+ xsave->header.xfeatures |= XFEATURE_MASK_PASID;
+ ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID);
+ if (ppasid_state) {
+ /*
+ * Only update the task's PASID state when it's
+ * different from the mm's pasid.
+ */
+ if (ppasid_state->pasid != pasid_state) {
+ /*
+ * Invalid fpregs so that xrstors will pick up
+ * the PASID state.
+ */
+ __fpu_invalidate_fpregs_state(fpu);
+ ppasid_state->pasid = pasid_state;
+ }
+ }
+ }
+}
+#endif /* CONFIG_IOMMU_SUPPORT */
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index fc90a079e228..60ffe083b6d6 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -19,6 +19,7 @@
#include <linux/mm_types.h>
#include <linux/ioasid.h>
#include <asm/page.h>
+#include <asm/fpu/api.h>

#include "pasid.h"

@@ -444,6 +445,24 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
return ret;
}

+static void _load_pasid(void *unused)
+{
+ update_pasid();
+}
+
+static void load_pasid(struct mm_struct *mm, u32 pasid)
+{
+ mutex_lock(&mm->context.lock);
+
+ /* Synchronize with READ_ONCE in update_pasid(). */
+ smp_store_release(&mm->pasid, pasid);
+
+ /* Update PASID MSR on all CPUs running the mm's tasks. */
+ on_each_cpu_mask(mm_cpumask(mm), _load_pasid, NULL, true);
+
+ mutex_unlock(&mm->context.lock);
+}
+
/* Caller must hold pasid_mutex, mm reference */
static int
intel_svm_bind_mm(struct device *dev, unsigned int flags,
@@ -591,6 +610,10 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
}

list_add_tail(&svm->list, &global_svm_list);
+ if (mm) {
+ /* The newly allocated pasid is loaded to the mm. */
+ load_pasid(mm, svm->pasid);
+ }
} else {
/*
* Binding a new device with existing PASID, need to setup
@@ -654,8 +677,11 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)

if (list_empty(&svm->devs)) {
ioasid_free(svm->pasid);
- if (svm->mm)
+ if (svm->mm) {
mmu_notifier_unregister(&svm->notifier, svm->mm);
+ /* Clear mm's pasid. */
+ load_pasid(svm->mm, PASID_DISABLED);
+ }
list_del(&svm->list);
/* We mandate that no page faults may be outstanding
* for the PASID when intel_svm_unbind_mm() is called.
--
2.19.1