RE: [PATCH v2 2/7] iommu/vt-d: Multiple descriptors per qi_submit_sync()

From: Tian, Kevin
Date: Wed Apr 15 2020 - 04:18:49 EST


> From: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
> Sent: Wednesday, April 15, 2020 1:26 PM
>
> Extend qi_submit_sync() function to support multiple descriptors.
>
> Signed-off-by: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
> Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
> ---
> drivers/iommu/dmar.c | 39 +++++++++++++++++++++++--------------
> include/linux/intel-iommu.h | 1 +
> 2 files changed, 25 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
> index bb42177e2369..61d049e91f84 100644
> --- a/drivers/iommu/dmar.c
> +++ b/drivers/iommu/dmar.c
> @@ -1157,12 +1157,11 @@ static inline void reclaim_free_desc(struct
> q_inval *qi)
> }
> }
>
> -static int qi_check_fault(struct intel_iommu *iommu, int index)
> +static int qi_check_fault(struct intel_iommu *iommu, int index, int
> wait_index)
> {
> u32 fault;
> int head, tail;
> struct q_inval *qi = iommu->qi;
> - int wait_index = (index + 1) % QI_LENGTH;
> int shift = qi_shift(iommu);
>
> if (qi->desc_status[wait_index] == QI_ABORT)
> @@ -1234,12 +1233,12 @@ static int qi_check_fault(struct intel_iommu
> *iommu, int index)
> int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
> unsigned int count, unsigned long options)
> {
> - int rc;
> struct q_inval *qi = iommu->qi;
> - int offset, shift, length;
> struct qi_desc wait_desc;
> int wait_index, index;
> unsigned long flags;
> + int offset, shift;
> + int rc, i;
>
> if (!qi)
> return 0;
> @@ -1248,32 +1247,41 @@ int qi_submit_sync(struct intel_iommu *iommu,
> struct qi_desc *desc,
> rc = 0;
>
> raw_spin_lock_irqsave(&qi->q_lock, flags);
> - while (qi->free_cnt < 3) {
> + /*
> + * Check if we have enough empty slots in the queue to submit,
> + * the calculation is based on:
> + * # of desc + 1 wait desc + 1 space between head and tail
> + */
> + while (qi->free_cnt < count + 2) {
> raw_spin_unlock_irqrestore(&qi->q_lock, flags);
> cpu_relax();
> raw_spin_lock_irqsave(&qi->q_lock, flags);
> }
>
> index = qi->free_head;
> - wait_index = (index + 1) % QI_LENGTH;
> + wait_index = (index + count) % QI_LENGTH;
> shift = qi_shift(iommu);
> - length = 1 << shift;
>
> - qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
> + for (i = 0; i < count; i++) {
> + offset = ((index + i) % QI_LENGTH) << shift;
> + memcpy(qi->desc + offset, &desc[i], 1 << shift);
> + qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
> + }

what about doing one memcpy and leave the loop only for updating
qi status?

> + qi->desc_status[wait_index] = QI_IN_USE;
>
> - offset = index << shift;
> - memcpy(qi->desc + offset, desc, length);
> wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
> QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
> + if (options & QI_OPT_WAIT_DRAIN)
> + wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
> wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
> wait_desc.qw2 = 0;
> wait_desc.qw3 = 0;
>
> offset = wait_index << shift;
> - memcpy(qi->desc + offset, &wait_desc, length);
> + memcpy(qi->desc + offset, &wait_desc, 1 << shift);
>
> - qi->free_head = (qi->free_head + 2) % QI_LENGTH;
> - qi->free_cnt -= 2;
> + qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
> + qi->free_cnt -= count + 1;
>
> /*
> * update the HW tail register indicating the presence of
> @@ -1289,7 +1297,7 @@ int qi_submit_sync(struct intel_iommu *iommu,
> struct qi_desc *desc,
> * a deadlock where the interrupt context can wait
> indefinitely
> * for free slots in the queue.
> */
> - rc = qi_check_fault(iommu, index);
> + rc = qi_check_fault(iommu, index, wait_index);
> if (rc)
> break;
>
> @@ -1298,7 +1306,8 @@ int qi_submit_sync(struct intel_iommu *iommu,
> struct qi_desc *desc,
> raw_spin_lock(&qi->q_lock);
> }
>
> - qi->desc_status[index] = QI_DONE;
> + for (i = 0; i < count; i++)
> + qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
>
> reclaim_free_desc(qi);
> raw_spin_unlock_irqrestore(&qi->q_lock, flags);
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index ee2d5cdd8339..cca1e5f9aeaa 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -333,6 +333,7 @@ enum {
>
> #define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
> #define QI_IWD_STATUS_WRITE (((u64)1) << 5)
> +#define QI_IWD_PRQ_DRAIN (((u64)1) << 7)
>
> #define QI_IOTLB_DID(did) (((u64)did) << 16)
> #define QI_IOTLB_DR(dr) (((u64)dr) << 7)
> --
> 2.17.1