Re: [patch 8/8] mm: make per-memcg lru lists exclusive
From: Hiroyuki Kamezawa
Date: Thu Jun 02 2011 - 09:17:09 EST
2011/6/1 Johannes Weiner <hannes@xxxxxxxxxxx>:
> All lru list walkers have been converted to operate on per-memcg
> lists, the global per-zone lists are no longer required.
>
> This patch makes the per-memcg lists exclusive and removes the global
> lists from memcg-enabled kernels.
>
> The per-memcg lists now string up page descriptors directly, which
> unifies/simplifies the list isolation code of page reclaim as well as
> it saves a full double-linked list head for each page in the system.
>
> At the core of this change is the introduction of the lruvec
> structure, an array of all lru list heads. It exists for each zone
> globally, and for each zone per memcg. All lru list operations are
> now done in generic code against lruvecs, with the memcg lru list
> primitives only doing accounting and returning the proper lruvec for
> the currently scanned memcg on isolation, or for the respective page
> on putback.
>
> Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
could you divide this into
- introduce lruvec
- don't record section? information into pc->flags because we see
"page" on memcg LRU
and there is no requirement to get page from "pc".
- remove pc->lru completely
?
Thanks,
-Kame
> ---
> include/linux/memcontrol.h | 53 ++++-----
> include/linux/mm_inline.h | 14 ++-
> include/linux/mmzone.h | 10 +-
> include/linux/page_cgroup.h | 36 ------
> mm/memcontrol.c | 271 ++++++++++++++++++-------------------------
> mm/page_alloc.c | 2 +-
> mm/page_cgroup.c | 38 +------
> mm/swap.c | 20 ++--
> mm/vmscan.c | 88 ++++++--------
> 9 files changed, 207 insertions(+), 325 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 56c1def..d3837f0 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -20,6 +20,7 @@
> #ifndef _LINUX_MEMCONTROL_H
> #define _LINUX_MEMCONTROL_H
> #include <linux/cgroup.h>
> +#include <linux/mmzone.h>
> struct mem_cgroup;
> struct page_cgroup;
> struct page;
> @@ -30,13 +31,6 @@ enum mem_cgroup_page_stat_item {
> MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */
> };
>
> -extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
> - struct list_head *dst,
> - unsigned long *scanned, int order,
> - int mode, struct zone *z,
> - struct mem_cgroup *mem_cont,
> - int active, int file);
> -
> #ifdef CONFIG_CGROUP_MEM_RES_CTLR
> /*
> * All "charge" functions with gfp_mask should use GFP_KERNEL or
> @@ -60,15 +54,14 @@ extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
>
> extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
> gfp_t gfp_mask);
> -struct page *mem_cgroup_lru_to_page(struct zone *, struct mem_cgroup *,
> - enum lru_list);
> -extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
> -extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
> -extern void mem_cgroup_rotate_reclaimable_page(struct page *page);
> -extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
> -extern void mem_cgroup_del_lru(struct page *page);
> -extern void mem_cgroup_move_lists(struct page *page,
> - enum lru_list from, enum lru_list to);
> +
> +struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
> +struct lruvec *mem_cgroup_lru_add_list(struct zone *, struct page *,
> + enum lru_list);
> +void mem_cgroup_lru_del_list(struct page *, enum lru_list);
> +void mem_cgroup_lru_del(struct page *);
> +struct lruvec *mem_cgroup_lru_move_lists(struct zone *, struct page *,
> + enum lru_list, enum lru_list);
>
> /* For coalescing uncharge for reducing memcg' overhead*/
> extern void mem_cgroup_uncharge_start(void);
> @@ -214,33 +207,33 @@ static inline int mem_cgroup_shmem_charge_fallback(struct page *page,
> return 0;
> }
>
> -static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
> -{
> -}
> -
> -static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
> +static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
> + struct mem_cgroup *mem)
> {
> - return ;
> + return &zone->lruvec;
> }
>
> -static inline void mem_cgroup_rotate_reclaimable_page(struct page *page)
> +static inline struct lruvec *mem_cgroup_lru_add_list(struct zone *zone,
> + struct page *page,
> + enum lru_list lru)
> {
> - return ;
> + return &zone->lruvec;
> }
>
> -static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
> +static inline void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
> {
> - return ;
> }
>
> -static inline void mem_cgroup_del_lru(struct page *page)
> +static inline void mem_cgroup_lru_del(struct page *page)
> {
> - return ;
> }
>
> -static inline void
> -mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
> +static inline struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
> + struct page *page,
> + enum lru_list from,
> + enum lru_list to)
> {
> + return &zone->lruvec;
> }
>
> static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
> diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
> index 8f7d247..43d5d9f 100644
> --- a/include/linux/mm_inline.h
> +++ b/include/linux/mm_inline.h
> @@ -25,23 +25,27 @@ static inline void
> __add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l,
> struct list_head *head)
> {
> + /* NOTE: Caller must ensure @head is on the right lruvec! */
> + mem_cgroup_lru_add_list(zone, page, l);
> list_add(&page->lru, head);
> __mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
> - mem_cgroup_add_lru_list(page, l);
> }
>
> static inline void
> add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
> {
> - __add_page_to_lru_list(zone, page, l, &zone->lru[l].list);
> + struct lruvec *lruvec = mem_cgroup_lru_add_list(zone, page, l);
> +
> + list_add(&page->lru, &lruvec->lists[l]);
> + __mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
> }
>
> static inline void
> del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
> {
> + mem_cgroup_lru_del_list(page, l);
> list_del(&page->lru);
> __mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
> - mem_cgroup_del_lru_list(page, l);
> }
>
> /**
> @@ -64,7 +68,6 @@ del_page_from_lru(struct zone *zone, struct page *page)
> {
> enum lru_list l;
>
> - list_del(&page->lru);
> if (PageUnevictable(page)) {
> __ClearPageUnevictable(page);
> l = LRU_UNEVICTABLE;
> @@ -75,8 +78,9 @@ del_page_from_lru(struct zone *zone, struct page *page)
> l += LRU_ACTIVE;
> }
> }
> + mem_cgroup_lru_del_list(page, l);
> + list_del(&page->lru);
> __mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
> - mem_cgroup_del_lru_list(page, l);
> }
>
> /**
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index e56f835..c2ddce5 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -158,6 +158,10 @@ static inline int is_unevictable_lru(enum lru_list l)
> return (l == LRU_UNEVICTABLE);
> }
>
> +struct lruvec {
> + struct list_head lists[NR_LRU_LISTS];
> +};
> +
> enum zone_watermarks {
> WMARK_MIN,
> WMARK_LOW,
> @@ -344,10 +348,8 @@ struct zone {
> ZONE_PADDING(_pad1_)
>
> /* Fields commonly accessed by the page reclaim scanner */
> - spinlock_t lru_lock;
> - struct zone_lru {
> - struct list_head list;
> - } lru[NR_LRU_LISTS];
> + spinlock_t lru_lock;
> + struct lruvec lruvec;
>
> struct zone_reclaim_stat reclaim_stat;
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index 961ecc7..a42ddf9 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -31,7 +31,6 @@ enum {
> struct page_cgroup {
> unsigned long flags;
> struct mem_cgroup *mem_cgroup;
> - struct list_head lru; /* per cgroup LRU list */
> };
>
> void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
> @@ -49,7 +48,6 @@ static inline void __init page_cgroup_init(void)
> #endif
>
> struct page_cgroup *lookup_page_cgroup(struct page *page);
> -struct page *lookup_cgroup_page(struct page_cgroup *pc);
>
> #define TESTPCGFLAG(uname, lname) \
> static inline int PageCgroup##uname(struct page_cgroup *pc) \
> @@ -121,40 +119,6 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
> bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
> local_irq_restore(*flags);
> }
> -
> -#ifdef CONFIG_SPARSEMEM
> -#define PCG_ARRAYID_WIDTH SECTIONS_SHIFT
> -#else
> -#define PCG_ARRAYID_WIDTH NODES_SHIFT
> -#endif
> -
> -#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS)
> -#error Not enough space left in pc->flags to store page_cgroup array IDs
> -#endif
> -
> -/* pc->flags: ARRAY-ID | FLAGS */
> -
> -#define PCG_ARRAYID_MASK ((1UL << PCG_ARRAYID_WIDTH) - 1)
> -
> -#define PCG_ARRAYID_OFFSET (BITS_PER_LONG - PCG_ARRAYID_WIDTH)
> -/*
> - * Zero the shift count for non-existent fields, to prevent compiler
> - * warnings and ensure references are optimized away.
> - */
> -#define PCG_ARRAYID_SHIFT (PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0))
> -
> -static inline void set_page_cgroup_array_id(struct page_cgroup *pc,
> - unsigned long id)
> -{
> - pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT);
> - pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT;
> -}
> -
> -static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc)
> -{
> - return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK;
> -}
> -
> #else /* CONFIG_CGROUP_MEM_RES_CTLR */
> struct page_cgroup;
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index d9d1a7e..4a365b7 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -133,10 +133,7 @@ struct mem_cgroup_stat_cpu {
> * per-zone information in memory controller.
> */
> struct mem_cgroup_per_zone {
> - /*
> - * spin_lock to protect the per cgroup LRU
> - */
> - struct list_head lists[NR_LRU_LISTS];
> + struct lruvec lruvec;
> unsigned long count[NR_LRU_LISTS];
>
> struct zone_reclaim_stat reclaim_stat;
> @@ -642,6 +639,26 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
> return (mem == root_mem_cgroup);
> }
>
> +/**
> + * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
> + * @zone: zone of the wanted lruvec
> + * @mem: memcg of the wanted lruvec
> + *
> + * Returns the lru list vector holding pages for the given @zone and
> + * @mem. This can be the global zone lruvec, if the memory controller
> + * is disabled.
> + */
> +struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, struct mem_cgroup *mem)
> +{
> + struct mem_cgroup_per_zone *mz;
> +
> + if (mem_cgroup_disabled())
> + return &zone->lruvec;
> +
> + mz = mem_cgroup_zoneinfo(mem, zone_to_nid(zone), zone_idx(zone));
> + return &mz->lruvec;
> +}
> +
> /*
> * Following LRU functions are allowed to be used without PCG_LOCK.
> * Operations are called by routine of global LRU independently from memcg.
> @@ -656,21 +673,74 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
> * When moving account, the page is not on LRU. It's isolated.
> */
>
> -struct page *mem_cgroup_lru_to_page(struct zone *zone, struct mem_cgroup *mem,
> - enum lru_list lru)
> +/**
> + * mem_cgroup_lru_add_list - account for adding an lru page and return lruvec
> + * @zone: zone of the page
> + * @page: the page itself
> + * @lru: target lru list
> + *
> + * This function must be called when a page is to be added to an lru
> + * list.
> + *
> + * Returns the lruvec to hold @page, the callsite is responsible for
> + * physically linking the page to &lruvec->lists[@lru].
> + */
> +struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
> + enum lru_list lru)
> {
> struct mem_cgroup_per_zone *mz;
> struct page_cgroup *pc;
> + struct mem_cgroup *mem;
>
> - mz = mem_cgroup_zoneinfo(mem, zone_to_nid(zone), zone_idx(zone));
> - pc = list_entry(mz->lists[lru].prev, struct page_cgroup, lru);
> - return lookup_cgroup_page(pc);
> + if (mem_cgroup_disabled())
> + return &zone->lruvec;
> +
> + pc = lookup_page_cgroup(page);
> + VM_BUG_ON(PageCgroupAcctLRU(pc));
> + if (PageCgroupUsed(pc)) {
> + /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
> + smp_rmb();
> + mem = pc->mem_cgroup;
> + } else {
> + /*
> + * If the page is no longer charged, add it to the
> + * root memcg's lru. Either it will be freed soon, or
> + * it will get charged again and the charger will
> + * relink it to the right list.
> + */
> + mem = root_mem_cgroup;
> + }
> + mz = page_cgroup_zoneinfo(mem, page);
> + /*
> + * We do not account for uncharged pages: they are linked to
> + * root_mem_cgroup but when the page is unlinked upon free,
> + * accounting would be done against pc->mem_cgroup.
> + */
> + if (PageCgroupUsed(pc)) {
> + /*
> + * Huge page splitting is serialized through the lru
> + * lock, so compound_order() is stable here.
> + */
> + MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
> + SetPageCgroupAcctLRU(pc);
> + }
> + return &mz->lruvec;
> }
>
> -void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
> +/**
> + * mem_cgroup_lru_del_list - account for removing an lru page
> + * @page: page to unlink
> + * @lru: lru list the page is sitting on
> + *
> + * This function must be called when a page is to be removed from an
> + * lru list.
> + *
> + * The callsite is responsible for physically unlinking &@page->lru.
> + */
> +void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
> {
> - struct page_cgroup *pc;
> struct mem_cgroup_per_zone *mz;
> + struct page_cgroup *pc;
>
> if (mem_cgroup_disabled())
> return;
> @@ -686,75 +756,35 @@ void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
> mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
> /* huge page split is done under lru_lock. so, we have no races. */
> MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
> - VM_BUG_ON(list_empty(&pc->lru));
> - list_del_init(&pc->lru);
> }
>
> -void mem_cgroup_del_lru(struct page *page)
> +void mem_cgroup_lru_del(struct page *page)
> {
> - mem_cgroup_del_lru_list(page, page_lru(page));
> + mem_cgroup_lru_del_list(page, page_lru(page));
> }
>
> -/*
> - * Writeback is about to end against a page which has been marked for immediate
> - * reclaim. If it still appears to be reclaimable, move it to the tail of the
> - * inactive list.
> +/**
> + * mem_cgroup_lru_move_lists - account for moving a page between lru lists
> + * @zone: zone of the page
> + * @page: page to move
> + * @from: current lru list
> + * @to: new lru list
> + *
> + * This function must be called when a page is moved between lru
> + * lists, or rotated on the same lru list.
> + *
> + * Returns the lruvec to hold @page in the future, the callsite is
> + * responsible for physically relinking the page to
> + * &lruvec->lists[@to].
> */
> -void mem_cgroup_rotate_reclaimable_page(struct page *page)
> -{
> - struct mem_cgroup_per_zone *mz;
> - struct page_cgroup *pc;
> - enum lru_list lru = page_lru(page);
> -
> - if (mem_cgroup_disabled())
> - return;
> -
> - pc = lookup_page_cgroup(page);
> - /* unused page is not rotated. */
> - if (!PageCgroupUsed(pc))
> - return;
> - /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
> - smp_rmb();
> - mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
> - list_move_tail(&pc->lru, &mz->lists[lru]);
> -}
> -
> -void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
> +struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
> + struct page *page,
> + enum lru_list from,
> + enum lru_list to)
> {
> - struct mem_cgroup_per_zone *mz;
> - struct page_cgroup *pc;
> -
> - if (mem_cgroup_disabled())
> - return;
> -
> - pc = lookup_page_cgroup(page);
> - /* unused page is not rotated. */
> - if (!PageCgroupUsed(pc))
> - return;
> - /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
> - smp_rmb();
> - mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
> - list_move(&pc->lru, &mz->lists[lru]);
> -}
> -
> -void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
> -{
> - struct page_cgroup *pc;
> - struct mem_cgroup_per_zone *mz;
> -
> - if (mem_cgroup_disabled())
> - return;
> - pc = lookup_page_cgroup(page);
> - VM_BUG_ON(PageCgroupAcctLRU(pc));
> - if (!PageCgroupUsed(pc))
> - return;
> - /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
> - smp_rmb();
> - mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
> - /* huge page split is done under lru_lock. so, we have no races. */
> - MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
> - SetPageCgroupAcctLRU(pc);
> - list_add(&pc->lru, &mz->lists[lru]);
> + /* TODO: this could be optimized, especially if from == to */
> + mem_cgroup_lru_del_list(page, from);
> + return mem_cgroup_lru_add_list(zone, page, to);
> }
>
> /*
> @@ -786,7 +816,7 @@ static void mem_cgroup_lru_del_before_commit(struct page *page)
> * is guarded by lock_page() because the page is SwapCache.
> */
> if (!PageCgroupUsed(pc))
> - mem_cgroup_del_lru_list(page, page_lru(page));
> + del_page_from_lru(zone, page);
> spin_unlock_irqrestore(&zone->lru_lock, flags);
> }
>
> @@ -800,22 +830,11 @@ static void mem_cgroup_lru_add_after_commit(struct page *page)
> if (likely(!PageLRU(page)))
> return;
> spin_lock_irqsave(&zone->lru_lock, flags);
> - /* link when the page is linked to LRU but page_cgroup isn't */
> if (PageLRU(page) && !PageCgroupAcctLRU(pc))
> - mem_cgroup_add_lru_list(page, page_lru(page));
> + add_page_to_lru_list(zone, page, page_lru(page));
> spin_unlock_irqrestore(&zone->lru_lock, flags);
> }
>
> -
> -void mem_cgroup_move_lists(struct page *page,
> - enum lru_list from, enum lru_list to)
> -{
> - if (mem_cgroup_disabled())
> - return;
> - mem_cgroup_del_lru_list(page, from);
> - mem_cgroup_add_lru_list(page, to);
> -}
> -
> int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
> {
> int ret;
> @@ -935,67 +954,6 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
> return &mz->reclaim_stat;
> }
>
> -unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
> - struct list_head *dst,
> - unsigned long *scanned, int order,
> - int mode, struct zone *z,
> - struct mem_cgroup *mem_cont,
> - int active, int file)
> -{
> - unsigned long nr_taken = 0;
> - struct page *page;
> - unsigned long scan;
> - LIST_HEAD(pc_list);
> - struct list_head *src;
> - struct page_cgroup *pc, *tmp;
> - int nid = zone_to_nid(z);
> - int zid = zone_idx(z);
> - struct mem_cgroup_per_zone *mz;
> - int lru = LRU_FILE * file + active;
> - int ret;
> -
> - BUG_ON(!mem_cont);
> - mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
> - src = &mz->lists[lru];
> -
> - scan = 0;
> - list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
> - if (scan >= nr_to_scan)
> - break;
> -
> - if (unlikely(!PageCgroupUsed(pc)))
> - continue;
> -
> - page = lookup_cgroup_page(pc);
> -
> - if (unlikely(!PageLRU(page)))
> - continue;
> -
> - scan++;
> - ret = __isolate_lru_page(page, mode, file);
> - switch (ret) {
> - case 0:
> - list_move(&page->lru, dst);
> - mem_cgroup_del_lru(page);
> - nr_taken += hpage_nr_pages(page);
> - break;
> - case -EBUSY:
> - /* we don't affect global LRU but rotate in our LRU */
> - mem_cgroup_rotate_lru_list(page, page_lru(page));
> - break;
> - default:
> - break;
> - }
> - }
> -
> - *scanned = scan;
> -
> - trace_mm_vmscan_memcg_isolate(0, nr_to_scan, scan, nr_taken,
> - 0, 0, 0, mode);
> -
> - return nr_taken;
> -}
> -
> #define mem_cgroup_from_res_counter(counter, member) \
> container_of(counter, struct mem_cgroup, member)
>
> @@ -3110,22 +3068,23 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
> static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
> int node, int zid, enum lru_list lru)
> {
> - struct zone *zone;
> struct mem_cgroup_per_zone *mz;
> - struct page_cgroup *pc, *busy;
> unsigned long flags, loop;
> struct list_head *list;
> + struct page *busy;
> + struct zone *zone;
> int ret = 0;
>
> zone = &NODE_DATA(node)->node_zones[zid];
> mz = mem_cgroup_zoneinfo(mem, node, zid);
> - list = &mz->lists[lru];
> + list = &mz->lruvec.lists[lru];
>
> loop = MEM_CGROUP_ZSTAT(mz, lru);
> /* give some margin against EBUSY etc...*/
> loop += 256;
> busy = NULL;
> while (loop--) {
> + struct page_cgroup *pc;
> struct page *page;
>
> ret = 0;
> @@ -3134,16 +3093,16 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
> spin_unlock_irqrestore(&zone->lru_lock, flags);
> break;
> }
> - pc = list_entry(list->prev, struct page_cgroup, lru);
> - if (busy == pc) {
> - list_move(&pc->lru, list);
> + page = list_entry(list->prev, struct page, lru);
> + if (busy == page) {
> + list_move(&page->lru, list);
> busy = NULL;
> spin_unlock_irqrestore(&zone->lru_lock, flags);
> continue;
> }
> spin_unlock_irqrestore(&zone->lru_lock, flags);
>
> - page = lookup_cgroup_page(pc);
> + pc = lookup_page_cgroup(page);
>
> ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);
> if (ret == -ENOMEM)
> @@ -3151,7 +3110,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
>
> if (ret == -EBUSY || ret == -EINVAL) {
> /* found lock contention or "pc" is obsolete. */
> - busy = pc;
> + busy = page;
> cond_resched();
> } else
> busy = NULL;
> @@ -4171,7 +4130,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
> for (zone = 0; zone < MAX_NR_ZONES; zone++) {
> mz = &pn->zoneinfo[zone];
> for_each_lru(l)
> - INIT_LIST_HEAD(&mz->lists[l]);
> + INIT_LIST_HEAD(&mz->lruvec.lists[l]);
> }
> return 0;
> }
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 3f8bce2..9da238d 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -4289,7 +4289,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
>
> zone_pcp_init(zone);
> for_each_lru(l) {
> - INIT_LIST_HEAD(&zone->lru[l].list);
> + INIT_LIST_HEAD(&zone->lruvec.lists[l]);
> zone->reclaim_stat.nr_saved_scan[l] = 0;
> }
> zone->reclaim_stat.recent_rotated[0] = 0;
> diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
> index 2daadc3..916c6f9 100644
> --- a/mm/page_cgroup.c
> +++ b/mm/page_cgroup.c
> @@ -11,12 +11,10 @@
> #include <linux/swapops.h>
> #include <linux/kmemleak.h>
>
> -static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
> +static void __meminit init_page_cgroup(struct page_cgroup *pc)
> {
> pc->flags = 0;
> - set_page_cgroup_array_id(pc, id);
> pc->mem_cgroup = NULL;
> - INIT_LIST_HEAD(&pc->lru);
> }
> static unsigned long total_usage;
>
> @@ -42,19 +40,6 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
> return base + offset;
> }
>
> -struct page *lookup_cgroup_page(struct page_cgroup *pc)
> -{
> - unsigned long pfn;
> - struct page *page;
> - pg_data_t *pgdat;
> -
> - pgdat = NODE_DATA(page_cgroup_array_id(pc));
> - pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
> - page = pfn_to_page(pfn);
> - VM_BUG_ON(pc != lookup_page_cgroup(page));
> - return page;
> -}
> -
> static int __init alloc_node_page_cgroup(int nid)
> {
> struct page_cgroup *base, *pc;
> @@ -75,7 +60,7 @@ static int __init alloc_node_page_cgroup(int nid)
> return -ENOMEM;
> for (index = 0; index < nr_pages; index++) {
> pc = base + index;
> - init_page_cgroup(pc, nid);
> + init_page_cgroup(pc);
> }
> NODE_DATA(nid)->node_page_cgroup = base;
> total_usage += table_size;
> @@ -117,19 +102,6 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
> return section->page_cgroup + pfn;
> }
>
> -struct page *lookup_cgroup_page(struct page_cgroup *pc)
> -{
> - struct mem_section *section;
> - struct page *page;
> - unsigned long nr;
> -
> - nr = page_cgroup_array_id(pc);
> - section = __nr_to_section(nr);
> - page = pfn_to_page(pc - section->page_cgroup);
> - VM_BUG_ON(pc != lookup_page_cgroup(page));
> - return page;
> -}
> -
> static void *__init_refok alloc_page_cgroup(size_t size, int nid)
> {
> void *addr = NULL;
> @@ -167,11 +139,9 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
> struct page_cgroup *base, *pc;
> struct mem_section *section;
> unsigned long table_size;
> - unsigned long nr;
> int nid, index;
>
> - nr = pfn_to_section_nr(pfn);
> - section = __nr_to_section(nr);
> + section = __pfn_to_section(pfn);
>
> if (section->page_cgroup)
> return 0;
> @@ -194,7 +164,7 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
>
> for (index = 0; index < PAGES_PER_SECTION; index++) {
> pc = base + index;
> - init_page_cgroup(pc, nr);
> + init_page_cgroup(pc);
> }
>
> section->page_cgroup = base - pfn;
> diff --git a/mm/swap.c b/mm/swap.c
> index 5602f1a..0a5a93b 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -209,12 +209,14 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
> static void pagevec_move_tail_fn(struct page *page, void *arg)
> {
> int *pgmoved = arg;
> - struct zone *zone = page_zone(page);
>
> if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
> enum lru_list lru = page_lru_base_type(page);
> - list_move_tail(&page->lru, &zone->lru[lru].list);
> - mem_cgroup_rotate_reclaimable_page(page);
> + struct lruvec *lruvec;
> +
> + lruvec = mem_cgroup_lru_move_lists(page_zone(page),
> + page, lru, lru);
> + list_move_tail(&page->lru, &lruvec->lists[lru]);
> (*pgmoved)++;
> }
> }
> @@ -420,12 +422,13 @@ static void lru_deactivate_fn(struct page *page, void *arg)
> */
> SetPageReclaim(page);
> } else {
> + struct lruvec *lruvec;
> /*
> * The page's writeback ends up during pagevec
> * We moves tha page into tail of inactive.
> */
> - list_move_tail(&page->lru, &zone->lru[lru].list);
> - mem_cgroup_rotate_reclaimable_page(page);
> + lruvec = mem_cgroup_lru_move_lists(zone, page, lru, lru);
> + list_move_tail(&page->lru, &lruvec->lists[lru]);
> __count_vm_event(PGROTATED);
> }
>
> @@ -597,7 +600,6 @@ void lru_add_page_tail(struct zone* zone,
> int active;
> enum lru_list lru;
> const int file = 0;
> - struct list_head *head;
>
> VM_BUG_ON(!PageHead(page));
> VM_BUG_ON(PageCompound(page_tail));
> @@ -617,10 +619,10 @@ void lru_add_page_tail(struct zone* zone,
> }
> update_page_reclaim_stat(zone, page_tail, file, active);
> if (likely(PageLRU(page)))
> - head = page->lru.prev;
> + __add_page_to_lru_list(zone, page_tail, lru,
> + page->lru.prev);
> else
> - head = &zone->lru[lru].list;
> - __add_page_to_lru_list(zone, page_tail, lru, head);
> + add_page_to_lru_list(zone, page_tail, lru);
> } else {
> SetPageUnevictable(page_tail);
> add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 23fd2b1..87e1fcb 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1080,15 +1080,14 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
>
> switch (__isolate_lru_page(page, mode, file)) {
> case 0:
> + mem_cgroup_lru_del(page);
> list_move(&page->lru, dst);
> - mem_cgroup_del_lru(page);
> nr_taken += hpage_nr_pages(page);
> break;
>
> case -EBUSY:
> /* else it is being freed elsewhere */
> list_move(&page->lru, src);
> - mem_cgroup_rotate_lru_list(page, page_lru(page));
> continue;
>
> default:
> @@ -1138,8 +1137,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
> break;
>
> if (__isolate_lru_page(cursor_page, mode, file) == 0) {
> + mem_cgroup_lru_del(cursor_page);
> list_move(&cursor_page->lru, dst);
> - mem_cgroup_del_lru(cursor_page);
> nr_taken += hpage_nr_pages(page);
> nr_lumpy_taken++;
> if (PageDirty(cursor_page))
> @@ -1168,19 +1167,22 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
> return nr_taken;
> }
>
> -static unsigned long isolate_pages_global(unsigned long nr,
> - struct list_head *dst,
> - unsigned long *scanned, int order,
> - int mode, struct zone *z,
> - int active, int file)
> +static unsigned long isolate_pages(unsigned long nr,
> + struct list_head *dst,
> + unsigned long *scanned, int order,
> + int mode, struct zone *z,
> + int active, int file,
> + struct mem_cgroup *mem)
> {
> + struct lruvec *lruvec = mem_cgroup_zone_lruvec(z, mem);
> int lru = LRU_BASE;
> +
> if (active)
> lru += LRU_ACTIVE;
> if (file)
> lru += LRU_FILE;
> - return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
> - mode, file);
> + return isolate_lru_pages(nr, &lruvec->lists[lru], dst,
> + scanned, order, mode, file);
> }
>
> /*
> @@ -1428,20 +1430,11 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
> lru_add_drain();
> spin_lock_irq(&zone->lru_lock);
>
> - if (scanning_global_lru(sc)) {
> - nr_taken = isolate_pages_global(nr_to_scan,
> - &page_list, &nr_scanned, sc->order,
> - sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ?
> - ISOLATE_BOTH : ISOLATE_INACTIVE,
> - zone, 0, file);
> - } else {
> - nr_taken = mem_cgroup_isolate_pages(nr_to_scan,
> - &page_list, &nr_scanned, sc->order,
> - sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ?
> + nr_taken = isolate_pages(nr_to_scan,
> + &page_list, &nr_scanned, sc->order,
> + sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ?
> ISOLATE_BOTH : ISOLATE_INACTIVE,
> - zone, sc->mem_cgroup,
> - 0, file);
> - }
> + zone, 0, file, sc->mem_cgroup);
>
> if (global_reclaim(sc)) {
> zone->pages_scanned += nr_scanned;
> @@ -1514,13 +1507,15 @@ static void move_active_pages_to_lru(struct zone *zone,
> pagevec_init(&pvec, 1);
>
> while (!list_empty(list)) {
> + struct lruvec *lruvec;
> +
> page = lru_to_page(list);
>
> VM_BUG_ON(PageLRU(page));
> SetPageLRU(page);
>
> - list_move(&page->lru, &zone->lru[lru].list);
> - mem_cgroup_add_lru_list(page, lru);
> + lruvec = mem_cgroup_lru_add_list(zone, page, lru);
> + list_move(&page->lru, &lruvec->lists[lru]);
> pgmoved += hpage_nr_pages(page);
>
> if (!pagevec_add(&pvec, page) || list_empty(list)) {
> @@ -1551,17 +1546,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
>
> lru_add_drain();
> spin_lock_irq(&zone->lru_lock);
> - if (scanning_global_lru(sc)) {
> - nr_taken = isolate_pages_global(nr_pages, &l_hold,
> - &pgscanned, sc->order,
> - ISOLATE_ACTIVE, zone,
> - 1, file);
> - } else {
> - nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold,
> - &pgscanned, sc->order,
> - ISOLATE_ACTIVE, zone,
> - sc->mem_cgroup, 1, file);
> - }
> + nr_taken = isolate_pages(nr_pages, &l_hold,
> + &pgscanned, sc->order,
> + ISOLATE_ACTIVE, zone,
> + 1, file, sc->mem_cgroup);
>
> if (global_reclaim(sc))
> zone->pages_scanned += pgscanned;
> @@ -3154,16 +3142,18 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
> */
> static void check_move_unevictable_page(struct page *page, struct zone *zone)
> {
> - VM_BUG_ON(PageActive(page));
> + struct lruvec *lruvec;
>
> + VM_BUG_ON(PageActive(page));
> retry:
> ClearPageUnevictable(page);
> if (page_evictable(page, NULL)) {
> enum lru_list l = page_lru_base_type(page);
>
> + lruvec = mem_cgroup_lru_move_lists(zone, page,
> + LRU_UNEVICTABLE, l);
> __dec_zone_state(zone, NR_UNEVICTABLE);
> - list_move(&page->lru, &zone->lru[l].list);
> - mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
> + list_move(&page->lru, &lruvec->lists[l]);
> __inc_zone_state(zone, NR_INACTIVE_ANON + l);
> __count_vm_event(UNEVICTABLE_PGRESCUED);
> } else {
> @@ -3171,8 +3161,9 @@ retry:
> * rotate unevictable list
> */
> SetPageUnevictable(page);
> - list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
> - mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
> + lruvec = mem_cgroup_lru_move_lists(zone, page, LRU_UNEVICTABLE,
> + LRU_UNEVICTABLE);
> + list_move(&page->lru, &lruvec->lists[LRU_UNEVICTABLE]);
> if (page_evictable(page, NULL))
> goto retry;
> }
> @@ -3233,14 +3224,6 @@ void scan_mapping_unevictable_pages(struct address_space *mapping)
>
> }
>
> -static struct page *lru_tailpage(struct zone *zone, struct mem_cgroup *mem,
> - enum lru_list lru)
> -{
> - if (mem)
> - return mem_cgroup_lru_to_page(zone, mem, lru);
> - return lru_to_page(&zone->lru[lru].list);
> -}
> -
> /**
> * scan_zone_unevictable_pages - check unevictable list for evictable pages
> * @zone - zone of which to scan the unevictable list
> @@ -3259,8 +3242,13 @@ static void scan_zone_unevictable_pages(struct zone *zone)
> first = mem = mem_cgroup_hierarchy_walk(NULL, mem);
> do {
> unsigned long nr_to_scan;
> + struct list_head *list;
> + struct lruvec *lruvec;
>
> nr_to_scan = zone_nr_lru_pages(zone, mem, LRU_UNEVICTABLE);
> + lruvec = mem_cgroup_zone_lruvec(zone, mem);
> + list = &lruvec->lists[LRU_UNEVICTABLE];
> +
> while (nr_to_scan > 0) {
> unsigned long batch_size;
> unsigned long scan;
> @@ -3272,7 +3260,7 @@ static void scan_zone_unevictable_pages(struct zone *zone)
> for (scan = 0; scan < batch_size; scan++) {
> struct page *page;
>
> - page = lru_tailpage(zone, mem, LRU_UNEVICTABLE);
> + page = lru_to_page(list);
> if (!trylock_page(page))
> continue;
> if (likely(PageLRU(page) &&
> --
> 1.7.5.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/