From: Jason Gunthorpe <jgg@xxxxxxxxxxxx>
Presumably the intent here was that hmm_range_fault() could put the data
into some HW specific format and thus avoid some work. However, nothing
actually does that, and it isn't clear how anything actually could do that
as hmm_range_fault() provides CPU addresses which must be DMA mapped.
Perhaps there is some special HW that does not need DMA mapping, but we
don't have any examples of this, and the theoretical performance win of
avoiding an extra scan over the pfns array doesn't seem worth the
complexity. Plus pfns needs to be scanned anyhow to sort out any
DEVICE_PRIVATE pages.
This version replaces the uint64_t with an usigned long containing a pfn
and fixed flags. On input flags is filled with the HMM_PFN_REQ_* values,
on successful output it is filled with HMM_PFN_* values, describing the
state of the pages.
diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index 9924f2caa0184c..c9f2329113a47f 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -185,9 +185,6 @@ The usage pattern is::
range.start = ...;
range.end = ...;
range.pfns = ...;
- range.flags = ...;
- range.values = ...;
- range.pfn_shift = ...;
if (!mmget_not_zero(interval_sub->notifier.mm))
return -EFAULT;
@@ -229,15 +226,10 @@ The hmm_range struct has 2 fields, default_flags and pfn_flags_mask, that specif
fault or snapshot policy for the whole range instead of having to set them
for each entry in the pfns array.
-For instance, if the device flags for range.flags are::
+For instance if the device driver wants pages for a range with at least read
+permission, it sets::
- range.flags[HMM_PFN_VALID] = (1 << 63);
- range.flags[HMM_PFN_WRITE] = (1 << 62);
-
-and the device driver wants pages for a range with at least read permission,
-it sets::
-
- range->default_flags = (1 << 63);
+ range->default_flags = HMM_PFN_REQ_FAULT;
range->pfn_flags_mask = 0;
and calls hmm_range_fault() as described above. This will fill fault all pages
@@ -246,18 +238,18 @@ in the range with at least read permission.
Now let's say the driver wants to do the same except for one page in the range for
which it wants to have write permission. Now driver set::
- range->default_flags = (1 << 63);
- range->pfn_flags_mask = (1 << 62);
- range->pfns[index_of_write] = (1 << 62);
+ range->default_flags = HMM_PFN_REQ_FAULT;
+ range->pfn_flags_mask = HMM_PFN_REQ_WRITE;
+ range->pfns[index_of_write] = HMM_PFN_REQ_WRITE;
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index cf0d9bd61bebf9..99697df28bfe12 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -518,9 +506,45 @@ static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
.invalidate = nouveau_svm_range_invalidate,
};
+static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
+ struct hmm_range *range, u64 *ioctl_addr)
+{
+ unsigned long i, npages;
+
+ /*
+ * The ioctl_addr prepared here is passed through nvif_object_ioctl()
+ * to an eventual DMA map in something like gp100_vmm_pgt_pfn()
+ *
+ * This is all just encoding the internal hmm reprensetation into a
@@ -542,12 +564,15 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
return -EBUSY;
range.notifier_seq = mmu_interval_read_begin(range.notifier);
- range.default_flags = 0;
- range.pfn_flags_mask = -1UL;
down_read(&mm->mmap_sem);
ret = hmm_range_fault(&range);
up_read(&mm->mmap_sem);
if (ret) {
+ /*
+ * FIXME: the input PFN_REQ flags are destroyed on
+ * -EBUSY, we need to regenerate them, also for the
+ * other continue below
+ */
if (ret == -EBUSY)
continue;
return ret;
@@ -562,7 +587,7 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
break;
}
- nouveau_dmem_convert_pfn(drm, &range);
+ nouveau_hmm_convert_pfn(drm, &range, ioctl_addr);
svmm->vmm->vmm.object.client->super = true;
ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL);
@@ -589,6 +614,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
} i;
u64 phys[16];
} args;
+ unsigned long hmm_pfns[ARRAY_SIZE(args.phys)];
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -19,45 +19,45 @@
#include <linux/mmu_notifier.h>
/*
- * hmm_pfn_flag_e - HMM flag enums
+ * On output:
+ * 0 - The page is faultable and a future call with
+ * HMM_PFN_REQ_FAULT could succeed.
+ * HMM_PFN_VALID - the pfn field points to a valid PFN. This PFN is at
+ * least readable. If dev_private_owner is !NULL then this could
+ * point at a DEVICE_PRIVATE page.
+ * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID)
+ * HMM_PFN_ERROR - accessing the pfn is impossible and the device should
+ * fail. ie poisoned memory, special pages, no vma, etc
*
- * Flags:
- * HMM_PFN_VALID: pfn is valid. It has, at least, read permission.
- * HMM_PFN_WRITE: CPU page table has write permission set
- *
- * The driver provides a flags array for mapping page protections to device
- * PTE bits. If the driver valid bit for an entry is bit 3,
- * i.e., (entry & (1 << 3)), then the driver must provide
- * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3.
- * Same logic apply to all flags. This is the same idea as vm_page_prot in vma
- * except that this is per device driver rather than per architecture.
+ * On input:
+ * 0 - Return the current state of the page, do not fault it.
+ * HMM_PFN_REQ_FAULT - The output must have HMM_PFN_VALID or hmm_range_fault()
+ * will fail
+ * HMM_PFN_REQ_WRITE - The output must have HMM_PFN_WRITE or hmm_range_fault()
+ * will fail. Must be combined with HMM_PFN_REQ_FAULT.
*/
-enum hmm_pfn_flag_e {
- HMM_PFN_VALID = 0,
- HMM_PFN_WRITE,
- HMM_PFN_FLAG_MAX
+enum hmm_pfn_flags {
+ HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1),
+ HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2),
+ HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3),
+
@@ -174,44 +162,44 @@ static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
}
if (required_fault)
return hmm_vma_fault(addr, end, required_fault, walk);
- return hmm_pfns_fill(addr, end, range, HMM_PFN_NONE);
+ return hmm_pfns_fill(addr, end, range, 0);
}
-static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
+static inline unsigned long pmd_to_hmm_pfn_flags(struct hmm_range *range,
+ pmd_t pmd)
{
if (pmd_protnone(pmd))
return 0;
- return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] |
- range->flags[HMM_PFN_WRITE] :
- range->flags[HMM_PFN_VALID];
+ return pmd_write(pmd) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID;