On Wed, 31 Jul 2019 18:41:16 -0400
Tony Krowiak <akrowiak@xxxxxxxxxxxxx> wrote:
Added two DBF log files for logging events and errors; one for the vfio_ap
driver, and one for each matrix mediated device.
While the s390dbf is useful (especially for accessing the information
in dumps), trace points are a more standard interface. Have you
evaluated that as well? (We probably should add something to the
vfio/mdev code as well; tracing there is a good complement to tracing
in vendor drivers.)
Also, isn't this independent of the rest of the series?
Signed-off-by: Tony Krowiak <akrowiak@xxxxxxxxxxxxx>
---
drivers/s390/crypto/vfio_ap_drv.c | 34 +++++++
drivers/s390/crypto/vfio_ap_ops.c | 187 ++++++++++++++++++++++++++++++----
drivers/s390/crypto/vfio_ap_private.h | 20 ++++
3 files changed, 223 insertions(+), 18 deletions(-)
diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
index d8da520ae1fa..04a77246c22a 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -22,6 +22,10 @@ MODULE_AUTHOR("IBM Corporation");
MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018");
MODULE_LICENSE("GPL v2");
+uint dbglvl = 3;
+module_param(dbglvl, uint, 0444);
+MODULE_PARM_DESC(dbglvl, "VFIO_AP driver debug level.");
More the default debug level, isn't it? (IIRC, you can change the level
of the s390dbfs dynamically.)
+
static struct ap_driver vfio_ap_drv;
struct ap_matrix_dev *matrix_dev;
@@ -158,6 +162,21 @@ static void vfio_ap_matrix_dev_destroy(void)
root_device_unregister(root_device);
}
+static void vfio_ap_log_queues_in_use(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long *apm, unsigned long *aqm)
+{
+ unsigned long apid, apqi;
+
+ for_each_set_bit_inv(apid, apm, AP_DEVICES) {
+ for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
+ VFIO_AP_DBF(matrix_dev, DBF_ERR,
+ "queue %02lx.%04lx in use by mdev %s\n",
+ apid, apqi,
+ dev_name(mdev_dev(matrix_mdev->mdev)));
I remember some issues wrt %s in s390dbfs (lifetime); will this dbf
potentially outlive the mdev? Or is the string copied? (Or has s390dbf
been changed to avoid that trap? If so, please disregard my comments.)
+ }
+ }
+}
+
static bool vfio_ap_resource_in_use(unsigned long *apm, unsigned long *aqm)
{
bool in_use = false;
@@ -179,6 +198,8 @@ static bool vfio_ap_resource_in_use(unsigned long *apm, unsigned long *aqm)
continue;
in_use = true;
+ vfio_ap_log_queues_in_use(matrix_mdev, apm_intrsctn,
+ aqm_intrsctn);
}
mutex_unlock(&matrix_dev->lock);
@@ -186,6 +207,16 @@ static bool vfio_ap_resource_in_use(unsigned long *apm, unsigned long *aqm)
return in_use;
}
+static int __init vfio_ap_debug_init(void)
+{
+ matrix_dev->dbf = debug_register(VFIO_AP_DRV_NAME, 1, 1,
+ DBF_SPRINTF_MAX_ARGS * sizeof(long));
It seems that debug_register() can possibly fail? (Unlikely, but we
should check.)
+ debug_register_view(matrix_dev->dbf, &debug_sprintf_view);
+ debug_set_level(matrix_dev->dbf, dbglvl);
+
+ return 0;
+}
+
static int __init vfio_ap_init(void)
{
int ret;
@@ -219,6 +250,8 @@ static int __init vfio_ap_init(void)
return ret;
}
+ vfio_ap_debug_init();
+
return 0;
}
@@ -227,6 +260,7 @@ static void __exit vfio_ap_exit(void)
vfio_ap_mdev_unregister();
ap_driver_unregister(&vfio_ap_drv);
vfio_ap_matrix_dev_destroy();
+ debug_unregister(matrix_dev->dbf);
}
module_init(vfio_ap_init);
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 0e748819abb6..1aa18eba43d0 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -167,17 +167,23 @@ static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
case AP_RESPONSE_INVALID_ADDRESS:
default:
/* All cases in default means AP not operational */
- WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
- status.response_code);
goto end_free;
}
} while (retries--);
- WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
- status.response_code);
end_free:
vfio_ap_free_aqic_resources(q);
q->matrix_mdev = NULL;
+ if (status.response_code) {
If I read the code correctly, we consider AP_RESPONSE_OTHERWISE_CHANGED
a success as well, don't we? (Not sure what that means, though.)
+ VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_WARN,
+ "IRQ disable failed for queue %02x.%04x: status response code=%u\n",
+ AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
+ status.response_code);
+ } else {
+ VFIO_AP_MDEV_DBF(q->matrix_mdev, DBF_INFO,
+ "IRQ disabled for queue %02x.%04x\n",
+ AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn));
+ }
return status;
}
(...)
@@ -321,8 +340,29 @@ static void vfio_ap_matrix_init(struct ap_config_info *info,
matrix->adm_max = info->apxa ? info->Nd : 15;
}
+static int vfio_ap_mdev_debug_init(struct ap_matrix_mdev *matrix_mdev)
+{
+ int ret;
+
+ matrix_mdev->dbf = debug_register(dev_name(mdev_dev(matrix_mdev->mdev)),
+ 1, 1,
+ DBF_SPRINTF_MAX_ARGS * sizeof(long));
+
+ if (!matrix_mdev->dbf)
+ return -ENOMEM;
Ok, here we do check for the result of debug_register().
+
+ ret = debug_register_view(matrix_mdev->dbf, &debug_sprintf_view);
+ if (ret)
+ return ret;
Don't we need to clean up ->dbf in the failure case?
Also, we probably need to check this as well for the other dbf.
+
+ debug_set_level(matrix_mdev->dbf, dbglvl);
+
+ return 0;
+}
+
static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
{
+ int ret;
struct ap_matrix_mdev *matrix_mdev;
if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
@@ -335,6 +375,13 @@ static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
}
matrix_mdev->mdev = mdev;
+
+ ret = vfio_ap_mdev_debug_init(matrix_mdev);
+ if (ret) {
+ kfree(matrix_mdev);
+ return ret;
You also should bump available_instances again in the failure case.
+ }
+
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
mdev_set_drvdata(mdev, matrix_mdev);
matrix_mdev->pqap_hook.hook = handle_pqap;
@@ -350,14 +397,19 @@ static int vfio_ap_mdev_remove(struct mdev_device *mdev)
{
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
- if (matrix_mdev->kvm)
+ if (matrix_mdev->kvm) {
+ VFIO_AP_MDEV_DBF(matrix_mdev, DBF_ERR,
+ "remove rejected, mdev in use by %s",
+ matrix_mdev->kvm->debugfs_dentry->d_iname);
Can this be a problem when the kvm goes away (and the d_iname is gone)?
Regardless of s390dbf implementation details, is d_iname even valid in
all cases (no debugfs)?
return -EBUSY;
+ }
mutex_lock(&matrix_dev->lock);
vfio_ap_mdev_reset_queues(mdev);
list_del(&matrix_mdev->node);
mutex_unlock(&matrix_dev->lock);
+ debug_unregister(matrix_mdev->dbf);
kfree(matrix_mdev);
mdev_set_drvdata(mdev, NULL);
atomic_inc(&matrix_dev->available_instances);
@@ -406,6 +458,22 @@ static struct attribute_group *vfio_ap_mdev_type_groups[] = {
NULL,
};
+static void vfio_ap_mdev_log_sharing_error(struct ap_matrix_mdev *logdev,
+ const char *assigned_to,
+ unsigned long *apm,
+ unsigned long *aqm)
+{
+ unsigned long apid, apqi;
+
+ for_each_set_bit_inv(apid, apm, AP_DEVICES) {
+ for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
+ VFIO_AP_MDEV_DBF(logdev, DBF_ERR,
+ "queue %02lx.%04lx already assigned to %s\n",
I'm also not 100% sure about string lifetimes here.
+ apid, apqi, assigned_to);
+ }
+ }
+}
+
/**
* vfio_ap_mdev_verify_no_sharing
*
@@ -448,22 +516,39 @@ static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev,
if (!bitmap_and(aqm, mdev_aqm, lstdev->matrix.aqm, AP_DOMAINS))
continue;
+ vfio_ap_mdev_log_sharing_error(matrix_mdev,
+ dev_name(mdev_dev(lstdev->mdev)),
+ apm, aqm);
+
return -EADDRINUSE;
}
return 0;
}
-static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *matrix_mdev,
+static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *logdev,
unsigned long *apm, unsigned long *aqm)
{
- int ret;
+ int ret = 0;
+ unsigned long apid, apqi;
+
+ for_each_set_bit_inv(apid, apm, AP_DEVICES) {
+ for_each_set_bit_inv(apqi, aqm, AP_DEVICES) {
+ if (!ap_owned_by_def_drv(apid, apqi))
+ continue;
+
+ VFIO_AP_MDEV_DBF(logdev, DBF_ERR,
+ "queue %02lx.%04lx owned by zcrypt\n",
s/zcrypt/default driver/ ?
+ apid, apqi);
+
+ ret = -EADDRNOTAVAIL;
+ }
+ }
- ret = ap_apqn_in_matrix_owned_by_def_drv(apm, aqm);
if (ret)
- return (ret == 1) ? -EADDRNOTAVAIL : ret;
+ return ret;
- return vfio_ap_mdev_verify_no_sharing(matrix_mdev, apm, aqm);
+ return vfio_ap_mdev_verify_no_sharing(logdev, apm, aqm);
}
static void vfio_ap_mdev_update_crycb(struct ap_matrix_mdev *matrix_mdev)
(...)
@@ -1013,9 +1132,10 @@ static void vfio_ap_mdev_wait_for_qempty(ap_qid_t qid)
msleep(20);
break;
default:
- pr_warn("%s: tapq response %02x waiting for queue %04x.%02x empty\n",
- __func__, status.response_code,
- AP_QID_CARD(qid), AP_QID_QUEUE(qid));
+ WARN_ONCE(1,
+ "%s: tapq response %02x waiting for queue %04x.%02x empty\n",
+ __func__, status.response_code,
+ AP_QID_CARD(qid), AP_QID_QUEUE(qid));
Why this change?
return;
}
} while (--retry);
(...)
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index 5cc3c2ebf151..f717e43e10cf 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -24,6 +24,21 @@
#define VFIO_AP_MODULE_NAME "vfio_ap"
#define VFIO_AP_DRV_NAME "vfio_ap"
+#define DBF_ERR 3 /* error conditions */
+#define DBF_WARN 4 /* warning conditions */
+#define DBF_INFO 5 /* informational */
+#define DBF_DEBUG 6 /* for debugging only */
Can you reuse the LOGLEVEL_* constants instead of rolling your own?
+
+#define DBF_SPRINTF_MAX_ARGS 5
+
+#define VFIO_AP_DBF(d_matrix_dev, ...) \
+ debug_sprintf_event(d_matrix_dev->dbf, ##__VA_ARGS__)
+
+#define VFIO_AP_MDEV_DBF(d_matrix_mdev, ...) \
+ debug_sprintf_event(d_matrix_mdev->dbf, ##__VA_ARGS__)
+
+extern uint dbglvl;
+
/**
* ap_matrix_dev - the AP matrix device structure
* @device: generic device structure associated with the AP matrix device
@@ -43,6 +58,7 @@ struct ap_matrix_dev {
struct list_head mdev_list;
struct mutex lock;
struct ap_driver *vfio_ap_drv;
+ debug_info_t *dbf;
};
extern struct ap_matrix_dev *matrix_dev;
@@ -77,6 +93,9 @@ struct ap_matrix {
* @group_notifier: notifier block used for specifying callback function for
* handling the VFIO_GROUP_NOTIFY_SET_KVM event
* @kvm: the struct holding guest's state
+ * @pqap_hook: handler for PQAP instruction
+ * @mdev: the matrix mediated device
Should updating the description for these two go into a trivial
separate patch?
+ * @dbf: the debug info log
*/
struct ap_matrix_mdev {
struct list_head node;
@@ -86,6 +105,7 @@ struct ap_matrix_mdev {
struct kvm *kvm;
struct kvm_s390_module_hook pqap_hook;
struct mdev_device *mdev;
+ debug_info_t *dbf;
};
extern int vfio_ap_mdev_register(void);