Re: [PATCH 06/18] mm, hugetlb: remove vma_need_reservation()

From: Naoya Horiguchi
Date: Mon Jul 29 2013 - 13:53:21 EST


Hi,

On Mon, Jul 29, 2013 at 02:31:57PM +0900, Joonsoo Kim wrote:
> vma_need_reservation() can be substituted by vma_has_reserves()
> with minor change. These function do almost same thing,
> so unifying them is better to maintain.

I think it could, but not with minor changes.
vma_has_reserves is mostly the negation of the vma_needs_reservation,
but not exactly (consider that vma(VM_NORESERVE) does not have nor
need any reservation.)

> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index bf2ee11..ff46a2c 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -451,8 +451,18 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
> vma->vm_private_data = (void *)0;
> }
>
> -/* Returns true if the VMA has associated reserve pages */
> -static int vma_has_reserves(struct vm_area_struct *vma, long chg)
> +/*
> + * Determine if the huge page at addr within the vma has an associated
> + * reservation. Where it does not we will need to logically increase
> + * reservation and actually increase subpool usage before an allocation
> + * can occur. Where any new reservation would be required the
> + * reservation change is prepared, but not committed. Once the page
> + * has been allocated from the subpool and instantiated the change should
> + * be committed via vma_commit_reservation. No action is required on
> + * failure.
> + */
> +static int vma_has_reserves(struct hstate *h,
> + struct vm_area_struct *vma, unsigned long addr)
> {
> if (vma->vm_flags & VM_NORESERVE) {
> /*

Could you add more detailed comment on top of the function for better
maintenability? What is the expected behavior, or what is the returned
values for each case with what reasons?
And this function is not a simple reserve checker any more, but it can
change reservation maps, so it would be nice to give more suitable name,
though I don't have any good suggestions.

> @@ -464,10 +474,22 @@ static int vma_has_reserves(struct vm_area_struct *vma, long chg)
> * step. Currently, we don't have any other solution to deal
> * with this situation properly, so add work-around here.
> */
> - if (vma->vm_flags & VM_MAYSHARE && chg == 0)
> - return 1;
> - else
> - return 0;
> + if (vma->vm_flags & VM_MAYSHARE) {
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + struct inode *inode = mapping->host;
> + pgoff_t idx = vma_hugecache_offset(h, vma, addr);
> + struct resv_map *resv = inode->i_mapping->private_data;

It would be nice if we can get resv_map from vma in a single function for
VM_MAYSHARE, so can you add it on vma_resv_map?

> + long chg;
> +
> + chg = region_chg(resv, idx, idx + 1);
> + if (chg < 0)
> + return -ENOMEM;
> +
> + if (chg == 0)
> + return 1;
> + }
> +
> + return 0;
> }
>
> /* Shared mappings always use reserves */
> @@ -478,8 +500,16 @@ static int vma_has_reserves(struct vm_area_struct *vma, long chg)
> * Only the process that called mmap() has reserves for
> * private mappings.
> */
> - if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
> + if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
> + pgoff_t idx = vma_hugecache_offset(h, vma, addr);
> + struct resv_map *resv = vma_resv_map(vma);
> +
> + /* Just for allocating region structure */
> + if (region_chg(resv, idx, idx + 1) < 0)
> + return -ENOMEM;
> +
> return 1;
> + }
>
> return 0;
> }
> @@ -542,8 +572,7 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
>
> static struct page *dequeue_huge_page_vma(struct hstate *h,
> struct vm_area_struct *vma,
> - unsigned long address, int avoid_reserve,
> - long chg)
> + unsigned long address, int avoid_reserve)
> {
> struct page *page = NULL;
> struct mempolicy *mpol;
> @@ -558,7 +587,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
> * have no page reserves. This check ensures that reservations are
> * not "stolen". The child may still get SIGKILLed
> */
> - if (!vma_has_reserves(vma, chg) &&
> + if (!vma_has_reserves(h, vma, address) &&
> h->free_huge_pages - h->resv_huge_pages == 0)
> return NULL;
>
> @@ -578,7 +607,7 @@ retry_cpuset:
> if (page) {
> if (avoid_reserve)
> break;
> - if (!vma_has_reserves(vma, chg))
> + if (!vma_has_reserves(h, vma, address))
> break;
>
> h->resv_huge_pages--;
> @@ -1077,42 +1106,6 @@ static void return_unused_surplus_pages(struct hstate *h,
> }
> }
>
> -/*
> - * Determine if the huge page at addr within the vma has an associated
> - * reservation. Where it does not we will need to logically increase
> - * reservation and actually increase subpool usage before an allocation
> - * can occur. Where any new reservation would be required the
> - * reservation change is prepared, but not committed. Once the page
> - * has been allocated from the subpool and instantiated the change should
> - * be committed via vma_commit_reservation. No action is required on
> - * failure.
> - */
> -static long vma_needs_reservation(struct hstate *h,
> - struct vm_area_struct *vma, unsigned long addr)
> -{
> - struct address_space *mapping = vma->vm_file->f_mapping;
> - struct inode *inode = mapping->host;
> -
> - if (vma->vm_flags & VM_MAYSHARE) {
> - pgoff_t idx = vma_hugecache_offset(h, vma, addr);
> - struct resv_map *resv = inode->i_mapping->private_data;
> -
> - return region_chg(resv, idx, idx + 1);
> -
> - } else if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
> - return 1;
> -
> - } else {
> - long err;
> - pgoff_t idx = vma_hugecache_offset(h, vma, addr);
> - struct resv_map *resv = vma_resv_map(vma);
> -
> - err = region_chg(resv, idx, idx + 1);
> - if (err < 0)
> - return err;
> - return 0;
> - }
> -}
> static void vma_commit_reservation(struct hstate *h,
> struct vm_area_struct *vma, unsigned long addr)
> {
> @@ -1140,8 +1133,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
> struct hugepage_subpool *spool = subpool_vma(vma);
> struct hstate *h = hstate_vma(vma);
> struct page *page;
> - long chg;
> - int ret, idx;
> + int ret, idx, has_reserve;
> struct hugetlb_cgroup *h_cg;
>
> idx = hstate_index(h);
> @@ -1153,20 +1145,21 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
> * need pages and subpool limit allocated allocated if no reserve
> * mapping overlaps.
> */
> - chg = vma_needs_reservation(h, vma, addr);
> - if (chg < 0)
> + has_reserve = vma_has_reserves(h, vma, addr);
> + if (has_reserve < 0)
> return ERR_PTR(-ENOMEM);
> - if (chg)
> - if (hugepage_subpool_get_pages(spool, chg))
> +
> + if (!has_reserve && (hugepage_subpool_get_pages(spool, 1) < 0))
> return ERR_PTR(-ENOSPC);
> ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
> if (ret) {
> - hugepage_subpool_put_pages(spool, chg);
> + if (!has_reserve)
> + hugepage_subpool_put_pages(spool, 1);

It seems that this fixes the subpool accounting, so worth separating to
another patch or writing on the description.

Thanks,
Naoya Horiguchi

> return ERR_PTR(-ENOSPC);
> }
> spin_lock(&hugetlb_lock);
> - page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg);
> + page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve);
> if (!page) {
> spin_unlock(&hugetlb_lock);
> page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
> @@ -1174,7 +1167,8 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
> hugetlb_cgroup_uncharge_cgroup(idx,
> pages_per_huge_page(h),
> h_cg);
> - hugepage_subpool_put_pages(spool, chg);
> + if (!has_reserve)
> + hugepage_subpool_put_pages(spool, 1);
> return ERR_PTR(-ENOSPC);
> }
> spin_lock(&hugetlb_lock);
> @@ -2769,7 +2763,7 @@ retry:
> * the spinlock.
> */
> if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED))
> - if (vma_needs_reservation(h, vma, address) < 0) {
> + if (vma_has_reserves(h, vma, address) < 0) {
> ret = VM_FAULT_OOM;
> goto backout_unlocked;
> }
> @@ -2860,7 +2854,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
> * consumed.
> */
> if ((flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) {
> - if (vma_needs_reservation(h, vma, address) < 0) {
> + if (vma_has_reserves(h, vma, address) < 0) {
> ret = VM_FAULT_OOM;
> goto out_mutex;
> }
> --
> 1.7.9.5
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/