Re: [PATCH 4/4] mm: prepare page_referenced() and page_idle to new THP refcounting

From: Kirill A. Shutemov
Date: Thu Nov 05 2015 - 12:28:01 EST


On Thu, Nov 05, 2015 at 07:03:24PM +0300, Vladimir Davydov wrote:
> On Tue, Nov 03, 2015 at 05:26:15PM +0200, Kirill A. Shutemov wrote:
> ...
> > @@ -812,60 +812,104 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
> > spinlock_t *ptl;
> > int referenced = 0;
> > struct page_referenced_arg *pra = arg;
> > + pgd_t *pgd;
> > + pud_t *pud;
> > + pmd_t *pmd;
> > + pte_t *pte;
> >
> > - if (unlikely(PageTransHuge(page))) {
> > - pmd_t *pmd;
> > -
> > - /*
> > - * rmap might return false positives; we must filter
> > - * these out using page_check_address_pmd().
> > - */
> > - pmd = page_check_address_pmd(page, mm, address, &ptl);
> > - if (!pmd)
> > + if (unlikely(PageHuge(page))) {
> > + /* when pud is not present, pte will be NULL */
> > + pte = huge_pte_offset(mm, address);
> > + if (!pte)
> > return SWAP_AGAIN;
> >
> > - if (vma->vm_flags & VM_LOCKED) {
> > + ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
> > + goto check_pte;
> > + }
> > +
> > + pgd = pgd_offset(mm, address);
> > + if (!pgd_present(*pgd))
> > + return SWAP_AGAIN;
> > + pud = pud_offset(pgd, address);
> > + if (!pud_present(*pud))
> > + return SWAP_AGAIN;
> > + pmd = pmd_offset(pud, address);
> > +
> > + if (pmd_trans_huge(*pmd)) {
> > + int ret = SWAP_AGAIN;
> > +
> > + ptl = pmd_lock(mm, pmd);
> > + if (!pmd_present(*pmd))
> > + goto unlock_pmd;
> > + if (unlikely(!pmd_trans_huge(*pmd))) {
> > spin_unlock(ptl);
> > + goto map_pte;
> > + }
> > +
> > + if (pmd_page(*pmd) != page)
> > + goto unlock_pmd;
> > +
> > + if (vma->vm_flags & VM_LOCKED) {
> > pra->vm_flags |= VM_LOCKED;
> > - return SWAP_FAIL; /* To break the loop */
> > + ret = SWAP_FAIL; /* To break the loop */
> > + goto unlock_pmd;
> > }
> >
> > if (pmdp_clear_flush_young_notify(vma, address, pmd))
> > referenced++;
> > -
> > spin_unlock(ptl);
> > + goto found;
> > +unlock_pmd:
> > + spin_unlock(ptl);
> > + return ret;
> > } else {
> > - pte_t *pte;
> > -
> > - /*
> > - * rmap might return false positives; we must filter
> > - * these out using page_check_address().
> > - */
> > - pte = page_check_address(page, mm, address, &ptl, 0);
> > - if (!pte)
> > + pmd_t pmde = *pmd;
> > + barrier();
>
> This is supposed to be
>
> pmd_t pmde = READ_ONCE(*pmd);
>
> Right?

See e37c69827063. If I read this correctly, barrier() is less overhead for
some archs.

>
> I don't understand why we need a barrier here. Why can't we just do
>
> } else if (!pmd_present(*pmd))
> reutnr SWAP_AGAIN;
>
> ?

See f72e7dcdd252 too.

> > + if (!pmd_present(pmde) || pmd_trans_huge(pmde))
> > return SWAP_AGAIN;
> > + }
--
Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/