Re: [RFC PATCH 0/3] Add mmap(MAP_CONTIG) support

From: Anshuman Khandual
Date: Thu Oct 12 2017 - 10:26:15 EST


On 10/12/2017 04:06 PM, Anshuman Khandual wrote:
> On 10/12/2017 07:16 AM, Mike Kravetz wrote:
>> The following is a 'possible' way to add such functionality. I just
>> did what was easy and pre-allocated contiguous pages which are used
>> to populate the mapping. I did not use any of the higher order
>> allocators such as alloc_contig_range. Therefore, it is limited to
> Just tried with a small prototype with an implementation similar to that
> of alloc_gigantic_page() where we scan the zones (applicable zonelist)
> for contiguous valid PFN range and try allocating with alloc_contig_range.
> Will share it soon.
>

With this patch on top of the series can allocate little more than
twice of 1UL << (MAX_ORDER - 1) number of pages on POWER. But the
problem is it keeps on reducing every attempt till it reaches
1UL << (MAX_ORDER - 1). Will look into it.

diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index 03c06ba..ce13b36 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -28,5 +28,6 @@
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
+#define MAP_CONTIG 0x80000 /* back with contiguous pages */

#endif /* _UAPI_ASM_POWERPC_MMAN_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index aee7917..4e6588d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1568,6 +1568,60 @@ struct mmap_arg_struct {
}
#endif /* __ARCH_WANT_SYS_OLD_MMAP */

+static bool is_pfn_range_valid(struct zone *z,
+ unsigned long start_pfn, unsigned long nr_pages)
+{
+ unsigned long i, end_pfn = start_pfn + nr_pages;
+ struct page *page;
+
+ for (i = start_pfn; i < end_pfn; i++) {
+ if (!pfn_valid(i))
+ return false;
+
+ page = pfn_to_page(i);
+ if (page_zone(page) != z)
+ return false;
+
+ if (PageReserved(page))
+ return false;
+
+ if (page_count(page) > 0)
+ return false;
+
+ if (PageHuge(page))
+ return false;
+ }
+ return true;
+}
+
+struct page *
+alloc_pages_vma_contig(gfp_t gfp, int order, struct vm_area_struct *vma,
+ unsigned long addr, int node, bool hugepage)
+{
+ struct zonelist *zonelist = node_zonelist(node, gfp);
+ struct zoneref *z;
+ struct zone *zone;
+ unsigned long pfn, nr_pages, flags, ret;
+
+ nr_pages = 1 << order;
+ for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp), NULL) {
+ spin_lock_irqsave(&zone->lock, flags);
+ pfn = ALIGN(zone->zone_start_pfn, nr_pages);
+ while (zone_spans_pfn(zone, pfn + nr_pages - 1)) {
+ if (is_pfn_range_valid(zone, pfn, nr_pages)) {
+ spin_unlock_irqrestore(&zone->lock, flags);
+ ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, gfp);
+ if (!ret)
+ return pfn_to_page(pfn);
+ spin_lock_irqsave(&zone->lock, flags);
+ }
+ pfn += nr_pages;
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ }
+ return NULL;
+}
+
/*
* Attempt to allocate a contiguous range of pages to back the
* specified vma. vm_private_data is used as a 'pointer' to the
@@ -1588,11 +1642,19 @@ static long __alloc_vma_contig_range(struct vm_area_struct *vma)
* allocations < MAX_ORDER in size. However, this should really
* handle arbitrary size allocations.
*/
+
+ /*
if (order >= MAX_ORDER)
return -ENOMEM;

- vma->vm_private_data = alloc_pages_vma(gfp, order, vma, vma->vm_start,
- numa_node_id(), false);
+ */
+
+ if (order >= MAX_ORDER)
+ vma->vm_private_data = alloc_pages_vma_contig(gfp, order, vma,
+ vma->vm_start, numa_node_id(), false);
+ else
+ vma->vm_private_data = alloc_pages_vma(gfp, order, vma,
+ vma->vm_start, numa_node_id(), false);
if (!vma->vm_private_data)
return -ENOMEM;