Re: [PATCH v3 2/5] iommu/amd: Introduce helper functions to access and update 256-bit DTE

From: Suthikulpanit, Suravee
Date: Mon Sep 16 2024 - 12:12:39 EST




On 9/7/2024 12:00 AM, Jason Gunthorpe wrote:
On Fri, Sep 06, 2024 at 12:13:05PM +0000, Suravee Suthikulpanit wrote:

+static void update_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data,
+ struct dev_table_entry *new)
+{
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
+ struct dev_table_entry *ptr = &dev_table[dev_data->devid];
+ struct dev_table_entry old;
+ u128 tmp;
+
+ lockdep_assert_held(&dev_data->dte_lock);
+
+ old.data128[0] = ptr->data128[0];
+ old.data128[1] = ptr->data128[1];
+
+ tmp = cmpxchg128(&ptr->data128[1], old.data128[1], new->data128[1]);
+ if (tmp == old.data128[1]) {
+ if (!try_cmpxchg128(&ptr->data128[0], &old.data128[0], new->data128[0])) {
+ /* Restore hi 128-bit */
+ cmpxchg128(&ptr->data128[1], new->data128[1], tmp);

Like I said before, you can't fix this. Just go fowards. Keeping an
old DTE will UAF things, that is much worse than just forcing a new
DTE and loosing some interrupt settings.

@@ -243,13 +285,28 @@ static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
if (!iommu)
return 0;
- amd_iommu_set_rlookup_table(iommu, alias);
- dev_table = get_dev_table(iommu);
- memcpy(dev_table[alias].data,
- dev_table[devid].data,
- sizeof(dev_table[alias].data));
+ /* Get DTE for pdev */
+ dev_data = dev_iommu_priv_get(&pdev->dev);
+ if (!dev_data)
+ return -EINVAL;
- return 0;
+ spin_lock(&dev_data->dte_lock);
+ get_dte256(iommu, dev_data, &dte);
+ spin_unlock(&dev_data->dte_lock);

You can't unlock after reading the DTE and the relock it to program
it. The interrupt data can have changed while unlocked.

Put the lock inside update_dte256() and force the interrupt bits
under the lock.

Something like this is what I'm expecting:

static void write_upper(struct dev_table_entry *ptr, struct dev_table_entry *new)
{
struct dev_table_entry old;

lockdep_assert_held(&dev_data->dte_lock);

do {
old->data128[1] = ptr->data128[1];
new->data64[2] &= ~INTR_MASK;
new->data64[2] |= old->data64[2] & INTR_MASK;
} while (!try_cmpxchg128(&ptr->data128[1], &old.data128[1],
new->data128[1]));
}

static void write_lower(struct dev_table_entry *ptr, struct dev_table_entry *new)
{
struct dev_table_entry old;

do {
old->data128[0] = ptr->data128[0];
} while (!try_cmpxchg128(&ptr->data128[0], &old.data128[0],
new->data128[0]));
}

static void update_dte256(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data,
struct dev_table_entry *new)
{

spin_lock(&dev_data->dte_lock);
if (!(ptr->data64[0] & V)) {
write_upper(ptr, new);
/* NO FLUSH assume V=0 never cached */
write_lower(ptr, new);
/* FLUSH */
} else if (!(new->data64[0] & V) {
write_lower(ptr, new);
/* FLUSH */
write_upper(ptr, new);
/* NO FLUSH assume V=0 never cached */
} else {
/* both are valid */
if (FIELD_GET(ptr->data[2], GUEST_PAGING_MODE) ==
FIELD_GET(new->data[2], GUEST_PAGING_MODE)) {
/* Upper doesn't change */
write_upper(ptr, new);
write_lower(ptr, new);
/* FLUSH */
else if (old has no guest page table) {
write_upper(ptr, new);
/* FLUSH */
write_lower(ptr, new);
/* FLUSH */
else if (new has no guest page table) {
write_lower(ptr, new);
/* FLUSH */
write_upper(ptr, new);
/* FLUSH */
} else {
struct dev_table_entry clear = {};

write_lower(ptr, &clear);
/* FLUSH to set V=0 */
write_upper(ptr, new);
/* NO FLUSH assume V=0 never cached */
write_lower(ptr, new);
/* FLUSH */
}
}

spin_unlock(&dev_data->dte_lock);
}

Got your point. I will update based on your suggestion here.

And it probably needs more logic to accomodate the VIOMMU valid bits
in the 2nd 128.

I'll take care of this when enable the HW-vIOMMU stuff.

Thanks,
Suravee