Re: mm/memory.c: Add update local tlb for smp race
From: maobibo
Date: Sun May 17 2020 - 23:39:48 EST
On 05/16/2020 05:34 PM, maobibo wrote:
>
>
> On 05/15/2020 09:50 PM, David Hildenbrand wrote:
>> On 14.05.20 08:50, Bibo Mao wrote:
>>> If there are two threads hitting page fault at the address, one
>>> thread updates pte entry and local tlb, the other thread can update
>>> local tlb also, rather than give up and let page fault happening
>>> again.
>>
>> Let me suggest
>>
>> "mm/memory: optimize concurrent page faults at same address
>>
>> If two threads concurrently fault at the same address, the thread that
>> won the race updates the PTE and its local TLB. For now, the other
>> thread gives up, simply does nothing, and continues.
>>
>> It could happen that this second thread triggers another fault, whereby
>> it only updates its local TLB while handling the fault. Instead of
>> triggering another fault, let's directly update the local TLB of the
>> second thread.
>> "
>>
>> If I got the intention of this patch correctly.
>>
>> Are there any performance numbers to support this patch?
>>
>> (I can't say too much about the correctness and/or usefulness of this patch)
>
> yes, that is the situation. On MIPS platform software can update TLB,
> so update_mmu_cache is used here. This does not happen frequently, and with the three series patches in later mail. I test lat_pagefault in lmbench, here is is result:
>
> with these three series patches,
> # ./lat_pagefault -N 10 /tmp/1
> Pagefaults on /tmp/1: 1.4973 microseconds
> # ./lat_pagefault -P 4 -N 10 /tmp/1
> Pagefaults on /tmp/1: 1.5716 microseconds
>
> original version, without these three series patch
> # ./lat_pagefault -N 10 /tmp/1
> Pagefaults on /tmp/1: 1.6489 microseconds
> # ./lat_pagefault -P 4 -N 10 /tmp/1
> Pagefaults on /tmp/1: 1.7214 microseconds
>
I tested the three patches one by one, there is no obvious improvement with
lat_pagefault case, I guess that it happens seldom where multiple threads access
the same page at the same time.
The improvement is because of another modification where pte_mkyoung is added
to get readable privilege on MIPS system.
regards
bibo, mao
>>>
>>> modified: mm/memory.c
>>
>> This does not belong into a patch description.
>
> well, I will modify the patch description.
>
> regards
> bibo,mao
>
>
>>
>>
>>> Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx>
>>> ---
>>> mm/memory.c | 30 ++++++++++++++++++++++--------
>>> 1 file changed, 22 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index f703fe8..3a741ce 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -2436,11 +2436,10 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
>>> if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
>>> /*
>>> * Other thread has already handled the fault
>>> - * and we don't need to do anything. If it's
>>> - * not the case, the fault will be triggered
>>> - * again on the same address.
>>> + * and update local tlb only
>>> */
>>> ret = false;
>>> + update_mmu_cache(vma, addr, vmf->pte);
>>> goto pte_unlock;
>>> }
>>>
>>> @@ -2463,8 +2462,9 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
>>> vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
>>> locked = true;
>>> if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
>>> - /* The PTE changed under us. Retry page fault. */
>>> + /* The PTE changed under us. update local tlb */
>>> ret = false;
>>> + update_mmu_cache(vma, addr, vmf->pte);
>>> goto pte_unlock;
>>> }
>>>
>>> @@ -2704,6 +2704,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>>> }
>>> flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>>> entry = mk_pte(new_page, vma->vm_page_prot);
>>> + entry = pte_mkyoung(entry);
>>> entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>>> /*
>>> * Clear the pte entry and flush it first, before updating the
>>> @@ -2752,6 +2753,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>>> new_page = old_page;
>>> page_copied = 1;
>>> } else {
>>> + update_mmu_cache(vma, vmf->address, vmf->pte);
>>> mem_cgroup_cancel_charge(new_page, memcg, false);
>>> }
>>>
>>> @@ -2812,6 +2814,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
>>> * pte_offset_map_lock.
>>> */
>>> if (!pte_same(*vmf->pte, vmf->orig_pte)) {
>>> + update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
>>> pte_unmap_unlock(vmf->pte, vmf->ptl);
>>> return VM_FAULT_NOPAGE;
>>> }
>>> @@ -2936,6 +2939,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
>>> vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
>>> vmf->address, &vmf->ptl);
>>> if (!pte_same(*vmf->pte, vmf->orig_pte)) {
>>> + update_mmu_cache(vma, vmf->address, vmf->pte);
>>> unlock_page(vmf->page);
>>> pte_unmap_unlock(vmf->pte, vmf->ptl);
>>> put_page(vmf->page);
>>> @@ -3341,8 +3345,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>>> vma->vm_page_prot));
>>> vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
>>> vmf->address, &vmf->ptl);
>>> - if (!pte_none(*vmf->pte))
>>> + if (!pte_none(*vmf->pte)) {
>>> + update_mmu_cache(vma, vmf->address, vmf->pte);
>>> goto unlock;
>>> + }
>>> ret = check_stable_address_space(vma->vm_mm);
>>> if (ret)
>>> goto unlock;
>>> @@ -3373,13 +3379,16 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>>> __SetPageUptodate(page);
>>>
>>> entry = mk_pte(page, vma->vm_page_prot);
>>> + entry = pte_mkyoung(entry);
>>> if (vma->vm_flags & VM_WRITE)
>>> entry = pte_mkwrite(pte_mkdirty(entry));
>>>
>>> vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
>>> &vmf->ptl);
>>> - if (!pte_none(*vmf->pte))
>>> + if (!pte_none(*vmf->pte)) {
>>> + update_mmu_cache(vma, vmf->address, vmf->pte);
>>> goto release;
>>> + }
>>>
>>> ret = check_stable_address_space(vma->vm_mm);
>>> if (ret)
>>> @@ -3646,11 +3655,14 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
>>> }
>>>
>>> /* Re-check under ptl */
>>> - if (unlikely(!pte_none(*vmf->pte)))
>>> + if (unlikely(!pte_none(*vmf->pte))) {
>>> + update_mmu_cache(vma, vmf->address, vmf->pte);
>>> return VM_FAULT_NOPAGE;
>>> + }
>>>
>>> flush_icache_page(vma, page);
>>> entry = mk_pte(page, vma->vm_page_prot);
>>> + entry = pte_mkyoung(entry);
>>> if (write)
>>> entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>>> /* copy-on-write page */
>>> @@ -4224,8 +4236,10 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
>>> vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
>>> spin_lock(vmf->ptl);
>>> entry = vmf->orig_pte;
>>> - if (unlikely(!pte_same(*vmf->pte, entry)))
>>> + if (unlikely(!pte_same(*vmf->pte, entry))) {
>>> + update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
>>> goto unlock;
>>> + }
>>> if (vmf->flags & FAULT_FLAG_WRITE) {
>>> if (!pte_write(entry))
>>> return do_wp_page(vmf);
>>>
>>
>>