[PATCHv3 07/41] mm, shmem: swich huge tmpfs to multi-order radix-tree entries

From: Kirill A. Shutemov
Date: Thu Sep 15 2016 - 08:11:59 EST


We would need to use multi-order radix-tree entires for ext4 and other
filesystems to have coherent view on tags (dirty/towrite) in the tree.

This patch converts huge tmpfs implementation to multi-order entries, so
we will be able to use the same code patch for all filesystems.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
---
mm/filemap.c | 320 +++++++++++++++++++++++++++++++++----------------------
mm/huge_memory.c | 47 +++++---
mm/khugepaged.c | 26 ++---
mm/shmem.c | 36 ++-----
4 files changed, 247 insertions(+), 182 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 8a287dfc5372..ac3a39b1fe6d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -114,7 +114,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
struct page *page, void *shadow)
{
struct radix_tree_node *node;
- int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
+ int nr = PageHuge(page) ? 1 : hpage_nr_pages(page);

VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageTail(page), page);
@@ -132,36 +132,32 @@ static void page_cache_tree_delete(struct address_space *mapping,
}
mapping->nrpages -= nr;

- for (i = 0; i < nr; i++) {
- node = radix_tree_replace_clear_tags(&mapping->page_tree,
- page->index + i, shadow);
- if (!node) {
- VM_BUG_ON_PAGE(nr != 1, page);
- return;
- }
+ node = radix_tree_replace_clear_tags(&mapping->page_tree,
+ page->index, shadow);
+ if (!node)
+ return;

- workingset_node_pages_dec(node);
- if (shadow)
- workingset_node_shadows_inc(node);
- else
- if (__radix_tree_delete_node(&mapping->page_tree, node))
- continue;
+ workingset_node_pages_dec(node);
+ if (shadow)
+ workingset_node_shadows_inc(node);
+ else
+ if (__radix_tree_delete_node(&mapping->page_tree, node))
+ return;

- /*
- * Track node that only contains shadow entries. DAX mappings
- * contain no shadow entries and may contain other exceptional
- * entries so skip those.
- *
- * Avoid acquiring the list_lru lock if already tracked.
- * The list_empty() test is safe as node->private_list is
- * protected by mapping->tree_lock.
- */
- if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
- list_empty(&node->private_list)) {
- node->private_data = mapping;
- list_lru_add(&workingset_shadow_nodes,
- &node->private_list);
- }
+ /*
+ * Track node that only contains shadow entries. DAX mappings
+ * contain no shadow entries and may contain other exceptional
+ * entries so skip those.
+ *
+ * Avoid acquiring the list_lru lock if already tracked.
+ * The list_empty() test is safe as node->private_list is
+ * protected by mapping->tree_lock.
+ */
+ if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
+ list_empty(&node->private_list)) {
+ node->private_data = mapping;
+ list_lru_add(&workingset_shadow_nodes,
+ &node->private_list);
}
}

@@ -264,12 +260,7 @@ void delete_from_page_cache(struct page *page)
if (freepage)
freepage(page);

- if (PageTransHuge(page) && !PageHuge(page)) {
- page_ref_sub(page, HPAGE_PMD_NR);
- VM_BUG_ON_PAGE(page_count(page) <= 0, page);
- } else {
- put_page(page);
- }
+ put_page(page);
}
EXPORT_SYMBOL(delete_from_page_cache);

@@ -1073,7 +1064,7 @@ EXPORT_SYMBOL(page_cache_prev_hole);
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
{
void **pagep;
- struct page *head, *page;
+ struct page *page;

rcu_read_lock();
repeat:
@@ -1094,25 +1085,25 @@ repeat:
goto out;
}

- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto repeat;

- /* The page was split under us? */
- if (compound_head(page) != head) {
- put_page(head);
- goto repeat;
- }
-
/*
* Has the page moved?
* This is part of the lockless pagecache protocol. See
* include/linux/pagemap.h for details.
*/
if (unlikely(page != *pagep)) {
- put_page(head);
+ put_page(page);
goto repeat;
}
+
+ /* For multi-order entries, find relevant subpage */
+ if (PageTransHuge(page)) {
+ VM_BUG_ON(offset - page->index < 0);
+ VM_BUG_ON(offset - page->index >= HPAGE_PMD_NR);
+ page += offset - page->index;
+ }
}
out:
rcu_read_unlock();
@@ -1275,7 +1266,7 @@ unsigned find_get_entries(struct address_space *mapping,
struct page **entries, pgoff_t *indices)
{
void **slot;
- unsigned int ret = 0;
+ unsigned int refs, ret = 0;
struct radix_tree_iter iter;

if (!nr_entries)
@@ -1283,7 +1274,10 @@ unsigned find_get_entries(struct address_space *mapping,

rcu_read_lock();
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
- struct page *head, *page;
+ struct page *page;
+ unsigned long index = iter.index;
+ if (index < start)
+ index = start;
repeat:
page = radix_tree_deref_slot(slot);
if (unlikely(!page))
@@ -1301,26 +1295,38 @@ repeat:
goto export;
}

- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto repeat;

- /* The page was split under us? */
- if (compound_head(page) != head) {
- put_page(head);
- goto repeat;
- }
-
/* Has the page moved? */
if (unlikely(page != *slot)) {
- put_page(head);
+ put_page(page);
goto repeat;
}
+
+ /* For multi-order entries, find relevant subpage */
+ if (PageTransHuge(page)) {
+ VM_BUG_ON(index - page->index < 0);
+ VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
+ page += index - page->index;
+ }
export:
- indices[ret] = iter.index;
+ indices[ret] = index;
entries[ret] = page;
if (++ret == nr_entries)
break;
+ if (radix_tree_exception(page) || !PageTransCompound(page))
+ continue;
+ for (refs = 0; ret < nr_entries &&
+ (index + 1) % HPAGE_PMD_NR;
+ ret++, refs++) {
+ indices[ret] = ++index;
+ entries[ret] = ++page;
+ }
+ if (refs)
+ page_ref_add(compound_head(page), refs);
+ if (ret == nr_entries)
+ break;
}
rcu_read_unlock();
return ret;
@@ -1347,14 +1353,17 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
{
struct radix_tree_iter iter;
void **slot;
- unsigned ret = 0;
+ unsigned refs, ret = 0;

if (unlikely(!nr_pages))
return 0;

rcu_read_lock();
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
- struct page *head, *page;
+ struct page *page;
+ unsigned long index = iter.index;
+ if (index < start)
+ index = start;
repeat:
page = radix_tree_deref_slot(slot);
if (unlikely(!page))
@@ -1373,25 +1382,35 @@ repeat:
continue;
}

- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto repeat;

- /* The page was split under us? */
- if (compound_head(page) != head) {
- put_page(head);
- goto repeat;
- }
-
/* Has the page moved? */
if (unlikely(page != *slot)) {
- put_page(head);
+ put_page(page);
goto repeat;
}

+ /* For multi-order entries, find relevant subpage */
+ if (PageTransHuge(page)) {
+ VM_BUG_ON(index - page->index < 0);
+ VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
+ page += index - page->index;
+ }
+
pages[ret] = page;
if (++ret == nr_pages)
break;
+ if (!PageTransCompound(page))
+ continue;
+ for (refs = 0; ret < nr_pages &&
+ (index + 1) % HPAGE_PMD_NR;
+ ret++, refs++, index++)
+ pages[ret] = ++page;
+ if (refs)
+ page_ref_add(compound_head(page), refs);
+ if (ret == nr_pages)
+ break;
}

rcu_read_unlock();
@@ -1410,19 +1429,22 @@ repeat:
*
* find_get_pages_contig() returns the number of pages which were found.
*/
-unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
+unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
unsigned int nr_pages, struct page **pages)
{
struct radix_tree_iter iter;
void **slot;
- unsigned int ret = 0;
+ unsigned int refs, ret = 0;

if (unlikely(!nr_pages))
return 0;

rcu_read_lock();
- radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
- struct page *head, *page;
+ radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, start) {
+ struct page *page;
+ unsigned long index = iter.index;
+ if (index < start)
+ index = start;
repeat:
page = radix_tree_deref_slot(slot);
/* The hole, there no reason to continue */
@@ -1442,19 +1464,12 @@ repeat:
break;
}

- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto repeat;

- /* The page was split under us? */
- if (compound_head(page) != head) {
- put_page(head);
- goto repeat;
- }
-
/* Has the page moved? */
if (unlikely(page != *slot)) {
- put_page(head);
+ put_page(page);
goto repeat;
}

@@ -1463,14 +1478,31 @@ repeat:
* otherwise we can get both false positives and false
* negatives, which is just confusing to the caller.
*/
- if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
+ if (page->mapping == NULL || page_to_pgoff(page) != index) {
put_page(page);
break;
}

+ /* For multi-order entries, find relevant subpage */
+ if (PageTransHuge(page)) {
+ VM_BUG_ON(index - page->index < 0);
+ VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
+ page += index - page->index;
+ }
+
pages[ret] = page;
if (++ret == nr_pages)
break;
+ if (!PageTransCompound(page))
+ continue;
+ for (refs = 0; ret < nr_pages &&
+ (index + 1) % HPAGE_PMD_NR;
+ ret++, refs++, index++)
+ pages[ret] = ++page;
+ if (refs)
+ page_ref_add(compound_head(page), refs);
+ if (ret == nr_pages)
+ break;
}
rcu_read_unlock();
return ret;
@@ -1488,20 +1520,23 @@ EXPORT_SYMBOL(find_get_pages_contig);
* Like find_get_pages, except we only return pages which are tagged with
* @tag. We update @index to index the next page for the traversal.
*/
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
+unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *indexp,
int tag, unsigned int nr_pages, struct page **pages)
{
struct radix_tree_iter iter;
void **slot;
- unsigned ret = 0;
+ unsigned refs, ret = 0;

if (unlikely(!nr_pages))
return 0;

rcu_read_lock();
radix_tree_for_each_tagged(slot, &mapping->page_tree,
- &iter, *index, tag) {
- struct page *head, *page;
+ &iter, *indexp, tag) {
+ struct page *page;
+ unsigned long index = iter.index;
+ if (index < *indexp)
+ index = *indexp;
repeat:
page = radix_tree_deref_slot(slot);
if (unlikely(!page))
@@ -1526,31 +1561,41 @@ repeat:
continue;
}

- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto repeat;

- /* The page was split under us? */
- if (compound_head(page) != head) {
- put_page(head);
- goto repeat;
- }
-
/* Has the page moved? */
if (unlikely(page != *slot)) {
- put_page(head);
+ put_page(page);
goto repeat;
}

+ /* For multi-order entries, find relevant subpage */
+ if (PageTransHuge(page)) {
+ VM_BUG_ON(index - page->index < 0);
+ VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
+ page += index - page->index;
+ }
+
pages[ret] = page;
if (++ret == nr_pages)
break;
+ if (!PageTransCompound(page))
+ continue;
+ for (refs = 0; ret < nr_pages &&
+ (index + 1) % HPAGE_PMD_NR;
+ ret++, refs++, index++)
+ pages[ret] = ++page;
+ if (refs)
+ page_ref_add(compound_head(page), refs);
+ if (ret == nr_pages)
+ break;
}

rcu_read_unlock();

if (ret)
- *index = pages[ret - 1]->index + 1;
+ *indexp = page_to_pgoff(pages[ret - 1]) + 1;

return ret;
}
@@ -1573,7 +1618,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
struct page **entries, pgoff_t *indices)
{
void **slot;
- unsigned int ret = 0;
+ unsigned int refs, ret = 0;
struct radix_tree_iter iter;

if (!nr_entries)
@@ -1582,7 +1627,10 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
rcu_read_lock();
radix_tree_for_each_tagged(slot, &mapping->page_tree,
&iter, start, tag) {
- struct page *head, *page;
+ struct page *page;
+ unsigned long index = iter.index;
+ if (index < start)
+ index = start;
repeat:
page = radix_tree_deref_slot(slot);
if (unlikely(!page))
@@ -1601,26 +1649,38 @@ repeat:
goto export;
}

- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto repeat;

- /* The page was split under us? */
- if (compound_head(page) != head) {
- put_page(head);
- goto repeat;
- }
-
/* Has the page moved? */
if (unlikely(page != *slot)) {
- put_page(head);
+ put_page(page);
goto repeat;
}
+
+ /* For multi-order entries, find relevant subpage */
+ if (PageTransHuge(page)) {
+ VM_BUG_ON(index - page->index < 0);
+ VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
+ page += index - page->index;
+ }
export:
- indices[ret] = iter.index;
+ indices[ret] = index;
entries[ret] = page;
if (++ret == nr_entries)
break;
+ if (radix_tree_exception(page) || !PageTransCompound(page))
+ continue;
+ for (refs = 0; ret < nr_entries &&
+ (index + 1) % HPAGE_PMD_NR;
+ ret++, refs++) {
+ indices[ret] = ++index;
+ entries[ret] = ++page;
+ }
+ if (refs)
+ page_ref_add(compound_head(page), refs);
+ if (ret == nr_entries)
+ break;
}
rcu_read_unlock();
return ret;
@@ -2202,12 +2262,15 @@ void filemap_map_pages(struct fault_env *fe,
struct address_space *mapping = file->f_mapping;
pgoff_t last_pgoff = start_pgoff;
loff_t size;
- struct page *head, *page;
+ struct page *page;

rcu_read_lock();
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
start_pgoff) {
- if (iter.index > end_pgoff)
+ unsigned long index = iter.index;
+ if (index < start_pgoff)
+ index = start_pgoff;
+ if (index > end_pgoff)
break;
repeat:
page = radix_tree_deref_slot(slot);
@@ -2218,25 +2281,26 @@ repeat:
slot = radix_tree_iter_retry(&iter);
continue;
}
+ page = NULL;
goto next;
}

- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto repeat;

- /* The page was split under us? */
- if (compound_head(page) != head) {
- put_page(head);
- goto repeat;
- }
-
/* Has the page moved? */
if (unlikely(page != *slot)) {
- put_page(head);
+ put_page(page);
goto repeat;
}

+ /* For multi-order entries, find relevant subpage */
+ if (PageTransHuge(page)) {
+ VM_BUG_ON(index - page->index < 0);
+ VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
+ page += index - page->index;
+ }
+
if (!PageUptodate(page) ||
PageReadahead(page) ||
PageHWPoison(page))
@@ -2244,20 +2308,20 @@ repeat:
if (!trylock_page(page))
goto skip;

- if (page->mapping != mapping || !PageUptodate(page))
+ if (page_mapping(page) != mapping || !PageUptodate(page))
goto unlock;

size = round_up(i_size_read(mapping->host), PAGE_SIZE);
- if (page->index >= size >> PAGE_SHIFT)
+ if (compound_head(page)->index >= size >> PAGE_SHIFT)
goto unlock;

if (file->f_ra.mmap_miss > 0)
file->f_ra.mmap_miss--;

- fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
+ fe->address += (index - last_pgoff) << PAGE_SHIFT;
if (fe->pte)
- fe->pte += iter.index - last_pgoff;
- last_pgoff = iter.index;
+ fe->pte += index - last_pgoff;
+ last_pgoff = index;
if (alloc_set_pte(fe, NULL, page))
goto unlock;
unlock_page(page);
@@ -2270,8 +2334,14 @@ next:
/* Huge page is mapped? No need to proceed. */
if (pmd_trans_huge(*fe->pmd))
break;
- if (iter.index == end_pgoff)
+ if (index == end_pgoff)
break;
+ if (page && PageTransCompound(page) &&
+ (index & (HPAGE_PMD_NR - 1)) !=
+ HPAGE_PMD_NR - 1) {
+ index++;
+ goto repeat;
+ }
}
rcu_read_unlock();
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a6abd76baa72..a6a25080469c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1824,6 +1824,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
struct page *head = compound_head(page);
struct zone *zone = page_zone(head);
struct lruvec *lruvec;
+ struct page *subpage;
pgoff_t end = -1;
int i;

@@ -1832,8 +1833,26 @@ static void __split_huge_page(struct page *page, struct list_head *list,
/* complete memcg works before add pages to LRU */
mem_cgroup_split_huge_fixup(head);

- if (!PageAnon(page))
- end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
+ if (!PageAnon(head)) {
+ struct address_space *mapping = head->mapping;
+ struct radix_tree_iter iter;
+ void **slot;
+
+ __dec_node_page_state(head, NR_SHMEM_THPS);
+
+ radix_tree_split(&mapping->page_tree, head->index, 0);
+ radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
+ head->index) {
+ if (iter.index >= head->index + HPAGE_PMD_NR)
+ break;
+ subpage = head + iter.index - head->index;
+ radix_tree_replace_slot(slot, subpage);
+ VM_BUG_ON_PAGE(compound_head(subpage) != head, subpage);
+ }
+ radix_tree_preload_end();
+
+ end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+ }

for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
__split_huge_page_tail(head, i, lruvec, list);
@@ -1862,7 +1881,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
unfreeze_page(head);

for (i = 0; i < HPAGE_PMD_NR; i++) {
- struct page *subpage = head + i;
+ subpage = head + i;
if (subpage == page)
continue;
unlock_page(subpage);
@@ -2019,8 +2038,8 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
goto out;
}

- /* Addidional pins from radix tree */
- extra_pins = HPAGE_PMD_NR;
+ /* Addidional pin from radix tree */
+ extra_pins = 1;
anon_vma = NULL;
i_mmap_lock_read(mapping);
}
@@ -2042,6 +2061,12 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
if (mlocked)
lru_add_drain();

+ if (mapping && radix_tree_split_preload(HPAGE_PMD_ORDER, 0,
+ GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto unfreeze;
+ }
+
/* prevent PageLRU to go away from under us, and freeze lru stats */
spin_lock_irqsave(zone_lru_lock(page_zone(head)), flags);

@@ -2051,10 +2076,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
spin_lock(&mapping->tree_lock);
pslot = radix_tree_lookup_slot(&mapping->page_tree,
page_index(head));
- /*
- * Check if the head page is present in radix tree.
- * We assume all tail are present too, if head is there.
- */
+ /* Check if the page is present in radix tree */
if (radix_tree_deref_slot_protected(pslot,
&mapping->tree_lock) != head)
goto fail;
@@ -2069,8 +2091,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
pgdata->split_queue_len--;
list_del(page_deferred_list(head));
}
- if (mapping)
- __dec_node_page_state(page, NR_SHMEM_THPS);
spin_unlock(&pgdata->split_queue_lock);
__split_huge_page(page, list, flags);
ret = 0;
@@ -2084,9 +2104,12 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
BUG();
}
spin_unlock(&pgdata->split_queue_lock);
-fail: if (mapping)
+fail: if (mapping) {
spin_unlock(&mapping->tree_lock);
+ radix_tree_preload_end();
+ }
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
+unfreeze:
unfreeze_page(head);
ret = -EBUSY;
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79c52d0061af..9929414b170c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1348,10 +1348,8 @@ static void collapse_shmem(struct mm_struct *mm,
break;
}
nr_none += n;
- for (; index < min(iter.index, end); index++) {
- radix_tree_insert(&mapping->page_tree, index,
- new_page + (index % HPAGE_PMD_NR));
- }
+ for (; index < min(iter.index, end); index++)
+ radix_tree_insert(&mapping->page_tree, index, new_page);

/* We are done. */
if (index >= end)
@@ -1420,8 +1418,7 @@ static void collapse_shmem(struct mm_struct *mm,
list_add_tail(&page->lru, &pagelist);

/* Finally, replace with the new page. */
- radix_tree_replace_slot(slot,
- new_page + (index % HPAGE_PMD_NR));
+ radix_tree_replace_slot(slot, new_page);

index++;
continue;
@@ -1438,24 +1435,17 @@ out_unlock:
break;
}

- /*
- * Handle hole in radix tree at the end of the range.
- * This code only triggers if there's nothing in radix tree
- * beyond 'end'.
- */
- if (result == SCAN_SUCCEED && index < end) {
+ if (result == SCAN_SUCCEED) {
int n = end - index;

- if (!shmem_charge(mapping->host, n)) {
+ if (n && !shmem_charge(mapping->host, n)) {
result = SCAN_FAIL;
goto tree_locked;
}
-
- for (; index < end; index++) {
- radix_tree_insert(&mapping->page_tree, index,
- new_page + (index % HPAGE_PMD_NR));
- }
nr_none += n;
+
+ radix_tree_join(&mapping->page_tree, start,
+ HPAGE_PMD_ORDER, new_page);
}

tree_locked:
diff --git a/mm/shmem.c b/mm/shmem.c
index ac3c35665ad7..ad47b131b2ef 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -540,33 +540,14 @@ static int shmem_add_to_page_cache(struct page *page,
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
VM_BUG_ON(expected && PageTransHuge(page));

- page_ref_add(page, nr);
+ get_page(page);
page->mapping = mapping;
page->index = index;

spin_lock_irq(&mapping->tree_lock);
- if (PageTransHuge(page)) {
- void __rcu **results;
- pgoff_t idx;
- int i;
-
- error = 0;
- if (radix_tree_gang_lookup_slot(&mapping->page_tree,
- &results, &idx, index, 1) &&
- idx < index + HPAGE_PMD_NR) {
- error = -EEXIST;
- }
-
- if (!error) {
- for (i = 0; i < HPAGE_PMD_NR; i++) {
- error = radix_tree_insert(&mapping->page_tree,
- index + i, page + i);
- VM_BUG_ON(error);
- }
- count_vm_event(THP_FILE_ALLOC);
- }
- } else if (!expected) {
- error = radix_tree_insert(&mapping->page_tree, index, page);
+ if (!expected) {
+ error = __radix_tree_insert(&mapping->page_tree, index,
+ compound_order(page), page);
} else {
error = shmem_radix_tree_replace(mapping, index, expected,
page);
@@ -574,15 +555,17 @@ static int shmem_add_to_page_cache(struct page *page,

if (!error) {
mapping->nrpages += nr;
- if (PageTransHuge(page))
+ if (PageTransHuge(page)) {
+ count_vm_event(THP_FILE_ALLOC);
__inc_node_page_state(page, NR_SHMEM_THPS);
+ }
__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
__mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
spin_unlock_irq(&mapping->tree_lock);
} else {
page->mapping = NULL;
spin_unlock_irq(&mapping->tree_lock);
- page_ref_sub(page, nr);
+ put_page(page);
}
return error;
}
@@ -1734,8 +1717,7 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
PageTransHuge(page));
if (error)
goto unacct;
- error = radix_tree_maybe_preload_order(gfp & GFP_RECLAIM_MASK,
- compound_order(page));
+ error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, hindex,
NULL);
--
2.9.3