Re: [PATCH v9 2/8] hugetlb_cgroup: add interface for charge/uncharge hugetlb reservations
From: David Rientjes
Date: Mon Jan 13 2020 - 19:45:42 EST
On Mon, 13 Jan 2020, Mike Kravetz wrote:
> > diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
> > index 35415af9ed26f..b03270b0d5833 100644
> > --- a/mm/hugetlb_cgroup.c
> > +++ b/mm/hugetlb_cgroup.c
> > @@ -96,8 +96,12 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
> > int idx;
> >
> > for (idx = 0; idx < hugetlb_max_hstate; idx++) {
> > - if (page_counter_read(&h_cg->hugepage[idx]))
> > + if (page_counter_read(
> > + hugetlb_cgroup_get_counter(h_cg, idx, true)) ||
> > + page_counter_read(
> > + hugetlb_cgroup_get_counter(h_cg, idx, false))) {
> > return true;
> > + }
> > }
> > return false;
> > }
> > @@ -108,18 +112,32 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
> > int idx;
> >
> > for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
> > - struct page_counter *counter = &h_cgroup->hugepage[idx];
> > - struct page_counter *parent = NULL;
> > + struct page_counter *fault_parent = NULL;
> > + struct page_counter *reserved_parent = NULL;
> > unsigned long limit;
> > int ret;
> >
> > - if (parent_h_cgroup)
> > - parent = &parent_h_cgroup->hugepage[idx];
> > - page_counter_init(counter, parent);
> > + if (parent_h_cgroup) {
> > + fault_parent = hugetlb_cgroup_get_counter(
> > + parent_h_cgroup, idx, false);
> > + reserved_parent = hugetlb_cgroup_get_counter(
> > + parent_h_cgroup, idx, true);
> > + }
> > + page_counter_init(hugetlb_cgroup_get_counter(h_cgroup, idx,
> > + false),
> > + fault_parent);
> > + page_counter_init(hugetlb_cgroup_get_counter(h_cgroup, idx,
> > + true),
> > + reserved_parent);
> >
> > limit = round_down(PAGE_COUNTER_MAX,
> > 1 << huge_page_order(&hstates[idx]));
> > - ret = page_counter_set_max(counter, limit);
> > +
> > + ret = page_counter_set_max(
> > + hugetlb_cgroup_get_counter(h_cgroup, idx, false),
> > + limit);
> > + ret = page_counter_set_max(
> > + hugetlb_cgroup_get_counter(h_cgroup, idx, true), limit);
> > VM_BUG_ON(ret);
>
> The second page_counter_set_max() call overwrites ret before the check in
> VM_BUG_ON().
>
> > }
> > }
> > @@ -149,7 +167,6 @@ static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
> > kfree(h_cgroup);
> > }
> >
> > -
> > /*
> > * Should be called with hugetlb_lock held.
> > * Since we are holding hugetlb_lock, pages cannot get moved from
> > @@ -165,7 +182,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
> > struct hugetlb_cgroup *page_hcg;
> > struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
> >
> > - page_hcg = hugetlb_cgroup_from_page(page);
> > + page_hcg = hugetlb_cgroup_from_page(page, false);
> > /*
> > * We can have pages in active list without any cgroup
> > * ie, hugepage with less than 3 pages. We can safely
> > @@ -184,7 +201,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
> > /* Take the pages off the local counter */
> > page_counter_cancel(counter, nr_pages);
> >
> > - set_hugetlb_cgroup(page, parent);
> > + set_hugetlb_cgroup(page, parent, false);
> > out:
> > return;
> > }
> > @@ -227,7 +244,7 @@ static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
> > }
> >
> > int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
> > - struct hugetlb_cgroup **ptr)
> > + struct hugetlb_cgroup **ptr, bool reserved)
> > {
> > int ret = 0;
> > struct page_counter *counter;
> > @@ -250,13 +267,20 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
> > }
> > rcu_read_unlock();
> >
> > - if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages,
> > - &counter)) {
> > + if (!page_counter_try_charge(hugetlb_cgroup_get_counter(h_cg, idx,
> > + reserved),
> > + nr_pages, &counter)) {
> > ret = -ENOMEM;
> > hugetlb_event(hugetlb_cgroup_from_counter(counter, idx), idx,
> > HUGETLB_MAX);
> > + css_put(&h_cg->css);
> > + goto done;
> > }
> > - css_put(&h_cg->css);
> > + /* Reservations take a reference to the css because they do not get
> > + * reparented.
>
> I'm hoping someone with more cgroup knowledge can comment on this and any
> consequences of not reparenting reservations. We previously talked about
> why reparenting would be very difficult/expensive. I understand why you are
> nopt doing it. Just do not fully understand what needs to be done from the
> cgroup side.
>
I don't see any description of how hugetlb_cgroup currently acts wrt
reparenting in the last patch in the series and how this is the same or
different for reservations. I think the discussion that is referenced
here is probably lost in some previous posting of the series. I think
it's particularly useful information that the end user will need to know
about for its handling so it would benefit from some documentation in the
last patch.