[PATCHv5 02/28] rmap: add argument to charge compound page

From: Kirill A. Shutemov
Date: Thu Apr 23 2015 - 17:06:00 EST


We're going to allow mapping of individual 4k pages of THP compound
page. It means we cannot rely on PageTransHuge() check to decide if
map/unmap small page or THP.

The patch adds new argument to rmap functions to indicate whether we want
to operate on whole compound page or only the small page.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Tested-by: Sasha Levin <sasha.levin@xxxxxxxxxx>
---
include/linux/rmap.h | 12 +++++++++---
kernel/events/uprobes.c | 4 ++--
mm/filemap_xip.c | 2 +-
mm/huge_memory.c | 16 ++++++++--------
mm/hugetlb.c | 4 ++--
mm/ksm.c | 4 ++--
mm/memory.c | 14 +++++++-------
mm/migrate.c | 8 ++++----
mm/rmap.c | 43 +++++++++++++++++++++++++++----------------
mm/swapfile.c | 4 ++--
10 files changed, 64 insertions(+), 47 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index d3630fa3a17b..e7ecba43ae71 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -159,16 +159,22 @@ static inline void anon_vma_merge(struct vm_area_struct *vma,

struct anon_vma *page_get_anon_vma(struct page *page);

+/* bitflags for do_page_add_anon_rmap() */
+#define RMAP_EXCLUSIVE 0x01
+#define RMAP_COMPOUND 0x02
+
/*
* rmap interfaces called when adding or removing pte of page
*/
void page_move_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
-void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
+void page_add_anon_rmap(struct page *, struct vm_area_struct *,
+ unsigned long, bool);
void do_page_add_anon_rmap(struct page *, struct vm_area_struct *,
unsigned long, int);
-void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
+void page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
+ unsigned long, bool);
void page_add_file_rmap(struct page *);
-void page_remove_rmap(struct page *);
+void page_remove_rmap(struct page *, bool);

void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
unsigned long);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index cb346f26a22d..5523daf59953 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -183,7 +183,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
goto unlock;

get_page(kpage);
- page_add_new_anon_rmap(kpage, vma, addr);
+ page_add_new_anon_rmap(kpage, vma, addr, false);
mem_cgroup_commit_charge(kpage, memcg, false);
lru_cache_add_active_or_unevictable(kpage, vma);

@@ -196,7 +196,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));

- page_remove_rmap(page);
+ page_remove_rmap(page, false);
if (!page_mapped(page))
try_to_free_swap(page);
pte_unmap_unlock(ptep, ptl);
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index c175f9f25210..791d9043a983 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -189,7 +189,7 @@ retry:
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
pteval = ptep_clear_flush(vma, address, pte);
- page_remove_rmap(page);
+ page_remove_rmap(page, false);
dec_mm_counter(mm, MM_FILEPAGES);
BUG_ON(pte_dirty(pteval));
pte_unmap_unlock(pte, ptl);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5a137c3a7f2f..b40fc0ff9315 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -752,7 +752,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
pmd_t entry;
entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
- page_add_new_anon_rmap(page, vma, haddr);
+ page_add_new_anon_rmap(page, vma, haddr, true);
mem_cgroup_commit_charge(page, memcg, false);
lru_cache_add_active_or_unevictable(page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
@@ -1043,7 +1043,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
memcg = (void *)page_private(pages[i]);
set_page_private(pages[i], 0);
- page_add_new_anon_rmap(pages[i], vma, haddr);
+ page_add_new_anon_rmap(pages[i], vma, haddr, false);
mem_cgroup_commit_charge(pages[i], memcg, false);
lru_cache_add_active_or_unevictable(pages[i], vma);
pte = pte_offset_map(&_pmd, haddr);
@@ -1055,7 +1055,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,

smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable);
- page_remove_rmap(page);
+ page_remove_rmap(page, true);
spin_unlock(ptl);

mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
@@ -1175,7 +1175,7 @@ alloc:
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
pmdp_clear_flush_notify(vma, haddr, pmd);
- page_add_new_anon_rmap(new_page, vma, haddr);
+ page_add_new_anon_rmap(new_page, vma, haddr, true);
mem_cgroup_commit_charge(new_page, memcg, false);
lru_cache_add_active_or_unevictable(new_page, vma);
set_pmd_at(mm, haddr, pmd, entry);
@@ -1185,7 +1185,7 @@ alloc:
put_huge_zero_page();
} else {
VM_BUG_ON_PAGE(!PageHead(page), page);
- page_remove_rmap(page);
+ page_remove_rmap(page, true);
put_page(page);
}
ret |= VM_FAULT_WRITE;
@@ -1440,7 +1440,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
put_huge_zero_page();
} else {
page = pmd_page(orig_pmd);
- page_remove_rmap(page);
+ page_remove_rmap(page, true);
VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
VM_BUG_ON_PAGE(!PageHead(page), page);
@@ -2285,7 +2285,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
* superfluous.
*/
pte_clear(vma->vm_mm, address, _pte);
- page_remove_rmap(src_page);
+ page_remove_rmap(src_page, false);
spin_unlock(ptl);
free_page_and_swap_cache(src_page);
}
@@ -2580,7 +2580,7 @@ static void collapse_huge_page(struct mm_struct *mm,

spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
- page_add_new_anon_rmap(new_page, vma, address);
+ page_add_new_anon_rmap(new_page, vma, address, true);
mem_cgroup_commit_charge(new_page, memcg, false);
lru_cache_add_active_or_unevictable(new_page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e8c92ae35b4b..eb2a0430535e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2797,7 +2797,7 @@ again:
if (huge_pte_dirty(pte))
set_page_dirty(page);

- page_remove_rmap(page);
+ page_remove_rmap(page, true);
force_flush = !__tlb_remove_page(tlb, page);
if (force_flush) {
address += sz;
@@ -3018,7 +3018,7 @@ retry_avoidcopy:
mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
set_huge_pte_at(mm, address, ptep,
make_huge_pte(vma, new_page, 1));
- page_remove_rmap(old_page);
+ page_remove_rmap(old_page, true);
hugepage_add_new_anon_rmap(new_page, vma, address);
/* Make the old page be freed below */
new_page = old_page;
diff --git a/mm/ksm.c b/mm/ksm.c
index bc7be0ee2080..fe09f3ddc912 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -957,13 +957,13 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
}

get_page(kpage);
- page_add_anon_rmap(kpage, vma, addr);
+ page_add_anon_rmap(kpage, vma, addr, false);

flush_cache_page(vma, addr, pte_pfn(*ptep));
ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));

- page_remove_rmap(page);
+ page_remove_rmap(page, false);
if (!page_mapped(page))
try_to_free_swap(page);
put_page(page);
diff --git a/mm/memory.c b/mm/memory.c
index f150f7ed4e84..d6171752ea59 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1122,7 +1122,7 @@ again:
mark_page_accessed(page);
rss[MM_FILEPAGES]--;
}
- page_remove_rmap(page);
+ page_remove_rmap(page, false);
if (unlikely(page_mapcount(page) < 0))
print_bad_pte(vma, addr, ptent, page);
if (unlikely(!__tlb_remove_page(tlb, page))) {
@@ -2108,7 +2108,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
* thread doing COW.
*/
ptep_clear_flush_notify(vma, address, page_table);
- page_add_new_anon_rmap(new_page, vma, address);
+ page_add_new_anon_rmap(new_page, vma, address, false);
mem_cgroup_commit_charge(new_page, memcg, false);
lru_cache_add_active_or_unevictable(new_page, vma);
/*
@@ -2141,7 +2141,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
* mapcount is visible. So transitively, TLBs to
* old page will be flushed before it can be reused.
*/
- page_remove_rmap(old_page);
+ page_remove_rmap(old_page, false);
}

/* Free the old page.. */
@@ -2556,7 +2556,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
flags &= ~FAULT_FLAG_WRITE;
ret |= VM_FAULT_WRITE;
- exclusive = 1;
+ exclusive = RMAP_EXCLUSIVE;
}
flush_icache_page(vma, page);
if (pte_swp_soft_dirty(orig_pte))
@@ -2566,7 +2566,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
do_page_add_anon_rmap(page, vma, address, exclusive);
mem_cgroup_commit_charge(page, memcg, true);
} else { /* ksm created a completely new copy */
- page_add_new_anon_rmap(page, vma, address);
+ page_add_new_anon_rmap(page, vma, address, false);
mem_cgroup_commit_charge(page, memcg, false);
lru_cache_add_active_or_unevictable(page, vma);
}
@@ -2704,7 +2704,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto release;

inc_mm_counter_fast(mm, MM_ANONPAGES);
- page_add_new_anon_rmap(page, vma, address);
+ page_add_new_anon_rmap(page, vma, address, false);
mem_cgroup_commit_charge(page, memcg, false);
lru_cache_add_active_or_unevictable(page, vma);
setpte:
@@ -2787,7 +2787,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
if (anon) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
- page_add_new_anon_rmap(page, vma, address);
+ page_add_new_anon_rmap(page, vma, address, false);
} else {
inc_mm_counter_fast(vma->vm_mm, MM_FILEPAGES);
page_add_file_rmap(page);
diff --git a/mm/migrate.c b/mm/migrate.c
index 022adc253cd4..9a380238a4d0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -166,7 +166,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
else
page_dup_rmap(new);
} else if (PageAnon(new))
- page_add_anon_rmap(new, vma, addr);
+ page_add_anon_rmap(new, vma, addr, false);
else
page_add_file_rmap(new);

@@ -1795,7 +1795,7 @@ fail_putback:
* guarantee the copy is visible before the pagetable update.
*/
flush_cache_range(vma, mmun_start, mmun_end);
- page_add_anon_rmap(new_page, vma, mmun_start);
+ page_add_anon_rmap(new_page, vma, mmun_start, true);
pmdp_clear_flush_notify(vma, mmun_start, pmd);
set_pmd_at(mm, mmun_start, pmd, entry);
flush_tlb_range(vma, mmun_start, mmun_end);
@@ -1806,13 +1806,13 @@ fail_putback:
flush_tlb_range(vma, mmun_start, mmun_end);
mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
- page_remove_rmap(new_page);
+ page_remove_rmap(new_page, true);
goto fail_putback;
}

mem_cgroup_migrate(page, new_page, false);

- page_remove_rmap(page);
+ page_remove_rmap(page, true);

spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
diff --git a/mm/rmap.c b/mm/rmap.c
index dad23a43e42c..4ca4b5cffd95 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1048,9 +1048,9 @@ static void __page_check_anon_rmap(struct page *page,
* (but PageKsm is never downgraded to PageAnon).
*/
void page_add_anon_rmap(struct page *page,
- struct vm_area_struct *vma, unsigned long address)
+ struct vm_area_struct *vma, unsigned long address, bool compound)
{
- do_page_add_anon_rmap(page, vma, address, 0);
+ do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
}

/*
@@ -1059,21 +1059,24 @@ void page_add_anon_rmap(struct page *page,
* Everybody else should continue to use page_add_anon_rmap above.
*/
void do_page_add_anon_rmap(struct page *page,
- struct vm_area_struct *vma, unsigned long address, int exclusive)
+ struct vm_area_struct *vma, unsigned long address, int flags)
{
int first = atomic_inc_and_test(&page->_mapcount);
if (first) {
+ bool compound = flags & RMAP_COMPOUND;
+ int nr = compound ? hpage_nr_pages(page) : 1;
/*
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
* these counters are not modified in interrupt context, and
* pte lock(a spinlock) is held, which implies preemption
* disabled.
*/
- if (PageTransHuge(page))
+ if (compound) {
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
__inc_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES);
- __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
- hpage_nr_pages(page));
+ }
+ __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
}
if (unlikely(PageKsm(page)))
return;
@@ -1081,7 +1084,8 @@ void do_page_add_anon_rmap(struct page *page,
VM_BUG_ON_PAGE(!PageLocked(page), page);
/* address might be in next vma when migration races vma_adjust */
if (first)
- __page_set_anon_rmap(page, vma, address, exclusive);
+ __page_set_anon_rmap(page, vma, address,
+ flags & RMAP_EXCLUSIVE);
else
__page_check_anon_rmap(page, vma, address);
}
@@ -1097,15 +1101,18 @@ void do_page_add_anon_rmap(struct page *page,
* Page does not have to be locked.
*/
void page_add_new_anon_rmap(struct page *page,
- struct vm_area_struct *vma, unsigned long address)
+ struct vm_area_struct *vma, unsigned long address, bool compound)
{
+ int nr = compound ? hpage_nr_pages(page) : 1;
+
VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
SetPageSwapBacked(page);
atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
- if (PageTransHuge(page))
+ if (compound) {
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
- __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
- hpage_nr_pages(page));
+ }
+ __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
__page_set_anon_rmap(page, vma, address, 1);
}

@@ -1161,9 +1168,12 @@ out:
*
* The caller needs to hold the pte lock.
*/
-void page_remove_rmap(struct page *page)
+void page_remove_rmap(struct page *page, bool compound)
{
+ int nr = compound ? hpage_nr_pages(page) : 1;
+
if (!PageAnon(page)) {
+ VM_BUG_ON_PAGE(compound && !PageHuge(page), page);
page_remove_file_rmap(page);
return;
}
@@ -1181,11 +1191,12 @@ void page_remove_rmap(struct page *page)
* these counters are not modified in interrupt context, and
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
- if (PageTransHuge(page))
+ if (compound) {
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
+ }

- __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
- -hpage_nr_pages(page));
+ __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, -nr);

if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
@@ -1327,7 +1338,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
dec_mm_counter(mm, MM_FILEPAGES);

discard:
- page_remove_rmap(page);
+ page_remove_rmap(page, false);
page_cache_release(page);

out_unmap:
diff --git a/mm/swapfile.c b/mm/swapfile.c
index a7e72103f23b..65825c2687f5 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1121,10 +1121,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
set_pte_at(vma->vm_mm, addr, pte,
pte_mkold(mk_pte(page, vma->vm_page_prot)));
if (page == swapcache) {
- page_add_anon_rmap(page, vma, addr);
+ page_add_anon_rmap(page, vma, addr, false);
mem_cgroup_commit_charge(page, memcg, true);
} else { /* ksm created a completely new copy */
- page_add_new_anon_rmap(page, vma, addr);
+ page_add_new_anon_rmap(page, vma, addr, false);
mem_cgroup_commit_charge(page, memcg, false);
lru_cache_add_active_or_unevictable(page, vma);
}
--
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/