Re: [PATCH v3 2/3] hugetlb: memcg: account hugetlb-backed memory in memory controller

From: Mike Kravetz
Date: Tue Oct 03 2023 - 13:14:14 EST


On 10/02/23 17:18, Nhat Pham wrote:
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index de220e3ff8be..74472e911b0a 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1902,6 +1902,7 @@ void free_huge_folio(struct folio *folio)
> pages_per_huge_page(h), folio);
> hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
> pages_per_huge_page(h), folio);
> + mem_cgroup_uncharge(folio);
> if (restore_reserve)
> h->resv_huge_pages++;
>
> @@ -3009,11 +3010,20 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
> struct hugepage_subpool *spool = subpool_vma(vma);
> struct hstate *h = hstate_vma(vma);
> struct folio *folio;
> - long map_chg, map_commit;
> + long map_chg, map_commit, nr_pages = pages_per_huge_page(h);
> long gbl_chg;
> - int ret, idx;
> + int memcg_charge_ret, ret, idx;
> struct hugetlb_cgroup *h_cg = NULL;
> + struct mem_cgroup *memcg;
> bool deferred_reserve;
> + gfp_t gfp = htlb_alloc_mask(h) | __GFP_RETRY_MAYFAIL;
> +
> + memcg = get_mem_cgroup_from_current();
> + memcg_charge_ret = mem_cgroup_hugetlb_try_charge(memcg, gfp, nr_pages);
> + if (memcg_charge_ret == -ENOMEM) {
> + mem_cgroup_put(memcg);
> + return ERR_PTR(-ENOMEM);
> + }
>
> idx = hstate_index(h);
> /*
> @@ -3022,8 +3032,12 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
> * code of zero indicates a reservation exists (no change).
> */
> map_chg = gbl_chg = vma_needs_reservation(h, vma, addr);
> - if (map_chg < 0)
> + if (map_chg < 0) {
> + if (!memcg_charge_ret)
> + mem_cgroup_cancel_charge(memcg, nr_pages);
> + mem_cgroup_put(memcg);
> return ERR_PTR(-ENOMEM);
> + }
>
> /*
> * Processes that did not create the mapping will have no
> @@ -3034,10 +3048,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
> */
> if (map_chg || avoid_reserve) {
> gbl_chg = hugepage_subpool_get_pages(spool, 1);
> - if (gbl_chg < 0) {
> - vma_end_reservation(h, vma, addr);
> - return ERR_PTR(-ENOSPC);
> - }
> + if (gbl_chg < 0)
> + goto out_end_reservation;
>
> /*
> * Even though there was no reservation in the region/reserve
> @@ -3119,6 +3131,11 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
> hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
> pages_per_huge_page(h), folio);
> }
> +
> + if (!memcg_charge_ret)
> + mem_cgroup_commit_charge(folio, memcg);
> + mem_cgroup_put(memcg);
> +
> return folio;
>
> out_uncharge_cgroup:
> @@ -3130,7 +3147,11 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
> out_subpool_put:
> if (map_chg || avoid_reserve)
> hugepage_subpool_put_pages(spool, 1);
> +out_end_reservation:
> vma_end_reservation(h, vma, addr);
> + if (!memcg_charge_ret)
> + mem_cgroup_cancel_charge(memcg, nr_pages);
> + mem_cgroup_put(memcg);
> return ERR_PTR(-ENOSPC);
> }
>

IIUC, huge page usage is charged in alloc_hugetlb_folio and uncharged in
free_huge_folio. During migration, huge pages are allocated via
alloc_migrate_hugetlb_folio, not alloc_hugetlb_folio. So, there is no
charging for the migration target page and we uncharge the source page.
It looks like there will be no charge for the huge page after migration?

If my analysis above is correct, then we may need to be careful about
this accounting. We may not want both source and target pages to be
charged at the same time.
--
Mike Kravetz