[PATCH 25/36] HMM: add helpers for migration back to system memory.
From: jglisse
Date: Thu May 21 2015 - 16:24:29 EST
From: JÃrÃme Glisse <jglisse@xxxxxxxxxx>
This patch add all necessary functions and helpers for migration
from device memory back to system memory. They are 3 differents
case that would use that code :
- CPU page fault
- fork
- device driver request
Note that this patch use regular memory accounting this means that
migration can fail as a result of memory cgroup resource exhaustion.
Latter patches will modify memcg to allow to keep remote memory
accounted as regular memory thus removing this point of failure.
Signed-off-by: JÃrÃme Glisse <jglisse@xxxxxxxxxx>
Signed-off-by: Sherry Cheung <SCheung@xxxxxxxxxx>
Signed-off-by: Subhash Gutti <sgutti@xxxxxxxxxx>
Signed-off-by: Mark Hairgrove <mhairgrove@xxxxxxxxxx>
Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
Signed-off-by: Jatin Kumar <jakumar@xxxxxxxxxx>
---
mm/hmm.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 157 insertions(+)
diff --git a/mm/hmm.c b/mm/hmm.c
index b8807b2..1208f64 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -50,6 +50,12 @@ static struct mmu_notifier_ops hmm_notifier_ops;
static inline struct hmm_mirror *hmm_mirror_ref(struct hmm_mirror *mirror);
static inline void hmm_mirror_unref(struct hmm_mirror **mirror);
static void hmm_mirror_kill(struct hmm_mirror *mirror);
+static int hmm_mirror_migrate_back(struct hmm_mirror *mirror,
+ struct hmm_event *event,
+ pte_t *new_pte,
+ dma_addr_t *dst,
+ unsigned long start,
+ unsigned long end);
static inline int hmm_mirror_update(struct hmm_mirror *mirror,
struct hmm_event *event,
struct page *page);
@@ -425,6 +431,46 @@ static struct mmu_notifier_ops hmm_notifier_ops = {
};
+static int hmm_migrate_back(struct hmm *hmm,
+ struct hmm_event *event,
+ struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ pte_t *new_pte,
+ dma_addr_t *dst,
+ unsigned long start,
+ unsigned long end)
+{
+ struct hmm_mirror *mirror;
+ int r, ret;
+
+ /*
+ * Do not return right away on error, as there might be valid page we
+ * can migrate.
+ */
+ ret = mm_hmm_migrate_back(mm, vma, new_pte, start, end);
+
+again:
+ down_read(&hmm->rwsem);
+ hlist_for_each_entry(mirror, &hmm->mirrors, mlist) {
+ r = hmm_mirror_migrate_back(mirror, event, new_pte,
+ dst, start, end);
+ if (r) {
+ ret = ret ? ret : r;
+ mirror = hmm_mirror_ref(mirror);
+ BUG_ON(!mirror);
+ up_read(&hmm->rwsem);
+ hmm_mirror_kill(mirror);
+ hmm_mirror_unref(&mirror);
+ goto again;
+ }
+ }
+ up_read(&hmm->rwsem);
+
+ mm_hmm_migrate_back_cleanup(mm, vma, new_pte, dst, start, end);
+
+ return ret;
+}
+
int hmm_handle_cpu_fault(struct mm_struct *mm,
struct vm_area_struct *vma,
pmd_t *pmdp, unsigned long addr,
@@ -1085,6 +1131,117 @@ out:
}
EXPORT_SYMBOL(hmm_mirror_fault);
+static int hmm_mirror_migrate_back(struct hmm_mirror *mirror,
+ struct hmm_event *event,
+ pte_t *new_pte,
+ dma_addr_t *dst,
+ unsigned long start,
+ unsigned long end)
+{
+ unsigned long addr, i, npages = (end - start) >> PAGE_SHIFT;
+ struct hmm_device *device = mirror->device;
+ struct device *dev = mirror->device->dev;
+ struct hmm_pt_iter iter;
+ int r, ret = 0;
+
+ hmm_pt_iter_init(&iter);
+ for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) {
+ dma_addr_t *hmm_pte;
+
+ hmm_pte_clear_select(&dst[i]);
+
+ if (!pte_present(new_pte[i]))
+ continue;
+ hmm_pte = hmm_pt_iter_update(&iter, &mirror->pt, addr);
+ if (!hmm_pte)
+ continue;
+
+ if (!hmm_pte_test_valid_dev(hmm_pte))
+ continue;
+
+ dst[i] = hmm_pte_from_pfn(pte_pfn(new_pte[i]));
+ hmm_pte_set_select(&dst[i]);
+ hmm_pte_set_write(&dst[i]);
+ }
+
+ if (device->dev) {
+ ret = hmm_mirror_dma_map_range(mirror, dst, NULL, npages);
+ if (ret) {
+ for (i = 0; i < npages; ++i) {
+ if (!hmm_pte_test_select(&dst[i]))
+ continue;
+ if (hmm_pte_test_valid_dma(&dst[i]))
+ continue;
+ dst[i] = 0;
+ }
+ }
+ }
+
+ r = device->ops->copy_from_device(mirror, event, dst, start, end);
+
+ /* Update mirror page table with successfully migrated entry. */
+ for (addr = start; addr < end;) {
+ unsigned long idx, next, npages;
+ dma_addr_t *hmm_pte;
+
+ hmm_pte = hmm_pt_iter_update(&iter, &mirror->pt, addr);
+ if (!hmm_pte) {
+ addr = hmm_pt_iter_next(&iter, &mirror->pt,
+ addr, end);
+ continue;
+ }
+
+ next = hmm_pt_level_next(&mirror->pt, addr, end,
+ mirror->pt.llevel - 1);
+
+ idx = (addr - event->start) >> PAGE_SHIFT;
+ npages = (next - addr) >> PAGE_SHIFT;
+ hmm_pt_iter_directory_lock(&iter, &mirror->pt);
+ for (i = 0; i < npages; i++, idx++) {
+ if (!hmm_pte_test_valid_pfn(&dst[idx]) &&
+ !hmm_pte_test_valid_dma(&dst[idx])) {
+ if (hmm_pte_test_valid_dev(&hmm_pte[i])) {
+ hmm_pte[i] = 0;
+ hmm_pt_iter_directory_unref(&iter,
+ mirror->pt.llevel);
+ }
+ continue;
+ }
+
+ VM_BUG_ON(!hmm_pte_test_select(&dst[idx]));
+ VM_BUG_ON(!hmm_pte_test_valid_dev(&hmm_pte[i]));
+ hmm_pte[i] = dst[idx];
+ }
+ hmm_pt_iter_directory_unlock(&iter, &mirror->pt);
+
+ /* DMA unmap failed migrate entry. */
+ if (dev) {
+ idx = (addr - event->start) >> PAGE_SHIFT;
+ for (i = 0; i < npages; i++, idx++) {
+ dma_addr_t dma_addr;
+
+ /*
+ * Failed entry have the valid bit clear but
+ * the select bit remain intact.
+ */
+ if (!hmm_pte_test_select(&dst[idx]) &&
+ !hmm_pte_test_valid_dma(&dst[i]))
+ continue;
+
+ hmm_pte_set_valid_dma(&dst[idx]);
+ dma_addr = hmm_pte_dma_addr(*hmm_pte);
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ }
+ }
+
+ addr = next;
+ }
+ hmm_pt_iter_fini(&iter, &mirror->pt);
+
+ return ret ? ret : r;
+}
+
/* hmm_mirror_range_discard() - discard a range of address.
*
* @mirror: The mirror struct.
--
1.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/