Re: [PATCH] s390/vfio-ap: do not use open locks during VFIO_GROUP_NOTIFY_SET_KVM notification

From: Jason J. Herne
Date: Tue Jun 29 2021 - 09:22:40 EST


On 6/25/21 6:07 PM, Tony Krowiak wrote:
The fix to resolve a lockdep splat while handling the
VFIO_GROUP_NOTIFY_SET_KVM event introduced a kvm_busy flag indicating that
the vfio_ap device driver is busy setting or unsetting the KVM pointer.
A wait queue was employed to allow functions requiring access to the KVM
pointer to wait for the kvm_busy flag to be cleared. For the duration of
the wait period, the mdev lock was unlocked then acquired again after the
kvm_busy flag was cleared. This got rid of the lockdep report, but didn't
really resolve the problem.

This patch removes the the kvm_busy flag and wait queue as they are not
necessary to resolve the lockdep splat problem. The wait queue was
introduced to prevent changes to the matrix used to update the guest's
AP configuration. The idea was that whenever an adapter, domain or control
domain was being assigned to or unassigned from the matrix, the function
would wait until the group notifier function was no longer busy with the
KVM pointer.

The thing is, the KVM pointer value (matrix_mdev->kvm) is always set and
cleared while holding the matrix_dev->lock mutex. The assignment and
unassignment interfaces also lock the matrix_dev->lock mutex prior to
checking whether the matrix_mdev->kvm pointer is set and if so, returns
the -EBUSY error from the function. Consequently, there is no chance for
an update to the matrix to occur while the guest's AP configuration is
being updated.

Fixes: 0cc00c8d4050 ("s390/vfio-ap: fix circular lockdep when setting/clearing crypto masks")
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Tony Krowiak <akrowiak@xxxxxxxxxxxxx>
---
drivers/s390/crypto/vfio_ap_ops.c | 77 +++++++--------------------
drivers/s390/crypto/vfio_ap_private.h | 2 -
2 files changed, 20 insertions(+), 59 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 742277bc8d1c..99a58f54c076 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -294,15 +294,6 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
struct ap_matrix_mdev, pqap_hook);
- /*
- * If the KVM pointer is in the process of being set, wait until the
- * process has completed.
- */
- wait_event_cmd(matrix_mdev->wait_for_kvm,
- !matrix_mdev->kvm_busy,
- mutex_unlock(&matrix_dev->lock),
- mutex_lock(&matrix_dev->lock));
-
/* If the there is no guest using the mdev, there is nothing to do */
if (!matrix_mdev->kvm)
goto out_unlock;
@@ -350,7 +341,6 @@ static int vfio_ap_mdev_create(struct mdev_device *mdev)
matrix_mdev->mdev = mdev;
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
- init_waitqueue_head(&matrix_mdev->wait_for_kvm);
mdev_set_drvdata(mdev, matrix_mdev);
matrix_mdev->pqap_hook = handle_pqap;
mutex_lock(&matrix_dev->lock);
@@ -623,7 +613,7 @@ static ssize_t assign_adapter_store(struct device *dev,
* If the KVM pointer is in flux or the guest is running, disallow
* un-assignment of adapter
*/
- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
+ if (matrix_mdev->kvm) {
ret = -EBUSY;
goto done;
}
@@ -696,7 +686,7 @@ static ssize_t unassign_adapter_store(struct device *dev,
* If the KVM pointer is in flux or the guest is running, disallow
* un-assignment of adapter
*/
- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
+ if (matrix_mdev->kvm) {
ret = -EBUSY;
goto done;
}
@@ -786,7 +776,7 @@ static ssize_t assign_domain_store(struct device *dev,
* If the KVM pointer is in flux or the guest is running, disallow
* assignment of domain
*/
- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
+ if (matrix_mdev->kvm) {
ret = -EBUSY;
goto done;
}
@@ -854,7 +844,7 @@ static ssize_t unassign_domain_store(struct device *dev,
* If the KVM pointer is in flux or the guest is running, disallow
* un-assignment of domain
*/
- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
+ if (matrix_mdev->kvm) {
ret = -EBUSY;
goto done;
}
@@ -908,7 +898,7 @@ static ssize_t assign_control_domain_store(struct device *dev,
* If the KVM pointer is in flux or the guest is running, disallow
* assignment of control domain.
*/
- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
+ if (matrix_mdev->kvm) {
ret = -EBUSY;
goto done;
}
@@ -967,7 +957,7 @@ static ssize_t unassign_control_domain_store(struct device *dev,
* If the KVM pointer is in flux or the guest is running, disallow
* un-assignment of control domain.
*/
- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
+ if (matrix_mdev->kvm) {
ret = -EBUSY;
goto done;
}
@@ -1108,14 +1098,17 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
struct ap_matrix_mdev *m;
if (kvm->arch.crypto.crycbd) {
+ mutex_lock(&matrix_dev->lock);
+
list_for_each_entry(m, &matrix_dev->mdev_list, node) {
- if (m != matrix_mdev && m->kvm == kvm)
+ if (m != matrix_mdev && m->kvm == kvm) {
+ mutex_unlock(&matrix_dev->lock);
return -EPERM;
+ }
}
kvm_get_kvm(kvm);
matrix_mdev->kvm = kvm;
- matrix_mdev->kvm_busy = true;
mutex_unlock(&matrix_dev->lock);
down_write(&matrix_mdev->kvm->arch.crypto.pqap_hook_rwsem);
@@ -1126,10 +1119,6 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
matrix_mdev->matrix.apm,
matrix_mdev->matrix.aqm,
matrix_mdev->matrix.adm);
-
- mutex_lock(&matrix_dev->lock);
- matrix_mdev->kvm_busy = false;
- wake_up_all(&matrix_mdev->wait_for_kvm);
}
return 0;
@@ -1181,33 +1170,21 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
*/
static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
{
- /*
- * If the KVM pointer is in the process of being set, wait until the
- * process has completed.
- */
- wait_event_cmd(matrix_mdev->wait_for_kvm,
- !matrix_mdev->kvm_busy,
- mutex_unlock(&matrix_dev->lock),
- mutex_lock(&matrix_dev->lock));
+ mutex_lock(&matrix_dev->lock);
- if (matrix_mdev->kvm) {
- matrix_mdev->kvm_busy = true;
+ if ((matrix_mdev->kvm) && (matrix_mdev->kvm->arch.crypto.crycbd)) {
mutex_unlock(&matrix_dev->lock);


If this function cannot be entered concurrently on separate threads then I think we can remove this mutex_unlock of matrix_dev->lock, (and the above mutex_lock) All that happens while holding the lock is the examination of the matrix_mdev->kvm pointer and then the subsequent examination of matrix_mdev->kvm->arch.crypto.crycbd. And since this function
is the only place that the kvm pointer is NULLed I don't see how the kvm pointer could go away between the two parts of the conditional. Again, this is only true if this function cannot be entered concurrently.


--
-- Jason J. Herne (jjherne@xxxxxxxxxxxxx)