[PATCH 1/7] porting bio-cgroup to 2.6.28-rc2-mm1

From: Gui Jianfeng
Date: Thu Nov 20 2008 - 06:11:15 EST


From: Ryo Tsuruta <ryov@xxxxxxxxxxxxx>

porting bio-cgroup to 2.6.28-rc2-mm1

Signed-off-by: Ryo Tsuruta <ryov@xxxxxxxxxxxxx>
Signed-off-by: Hirokazu Takahashi <taka@xxxxxxxxxxxxx>
---
block/blk-ioc.c | 30 +++--
fs/buffer.c | 2 +
fs/direct-io.c | 2 +
include/linux/biotrack.h | 82 ++++++++++++
include/linux/cgroup_subsys.h | 6 +
include/linux/iocontext.h | 1 +
include/linux/memcontrol.h | 14 ++-
include/linux/mmzone.h | 4 +-
include/linux/page_cgroup.h | 11 ++-
init/Kconfig | 15 +++
mm/Makefile | 4 +-
mm/biotrack.c | 274 +++++++++++++++++++++++++++++++++++++++++
mm/bounce.c | 2 +
mm/filemap.c | 2 +
mm/memcontrol.c | 5 +
mm/memory.c | 5 +
mm/page-writeback.c | 2 +
mm/page_cgroup.c | 15 ++-
mm/swap_state.c | 2 +
19 files changed, 452 insertions(+), 26 deletions(-)
create mode 100644 include/linux/biotrack.h
create mode 100644 mm/biotrack.c

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 012f065..ef8cac0 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -84,24 +84,28 @@ void exit_io_context(void)
}
}

+void init_io_context(struct io_context *ioc)
+{
+ atomic_set(&ioc->refcount, 1);
+ atomic_set(&ioc->nr_tasks, 1);
+ spin_lock_init(&ioc->lock);
+ ioc->ioprio_changed = 0;
+ ioc->ioprio = 0;
+ ioc->last_waited = jiffies; /* doesn't matter... */
+ ioc->nr_batch_requests = 0; /* because this is 0 */
+ ioc->aic = NULL;
+ INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
+ INIT_HLIST_HEAD(&ioc->cic_list);
+ ioc->ioc_data = NULL;
+}
+
struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
{
struct io_context *ret;

ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
- if (ret) {
- atomic_set(&ret->refcount, 1);
- atomic_set(&ret->nr_tasks, 1);
- spin_lock_init(&ret->lock);
- ret->ioprio_changed = 0;
- ret->ioprio = 0;
- ret->last_waited = jiffies; /* doesn't matter... */
- ret->nr_batch_requests = 0; /* because this is 0 */
- ret->aic = NULL;
- INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
- INIT_HLIST_HEAD(&ret->cic_list);
- ret->ioc_data = NULL;
- }
+ if (ret)
+ init_io_context(ret);

return ret;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index f624fc7..0edbac0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -36,6 +36,7 @@
#include <linux/buffer_head.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/bio.h>
+#include <linux/biotrack.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/bitops.h>
@@ -779,6 +780,7 @@ static int __set_page_dirty(struct page *page,
BDI_RECLAIMABLE);
task_io_account_write(PAGE_CACHE_SIZE);
}
+ bio_cgroup_reset_owner_pagedirty(page, current->mm);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index af0558d..222a970 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -33,6 +33,7 @@
#include <linux/err.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
+#include <linux/biotrack.h>
#include <linux/rwsem.h>
#include <linux/uio.h>
#include <asm/atomic.h>
@@ -799,6 +800,7 @@ static int do_direct_IO(struct dio *dio)
ret = PTR_ERR(page);
goto out;
}
+ bio_cgroup_reset_owner(page, current->mm);

while (block_in_page < blocks_per_page) {
unsigned offset_in_page = block_in_page << blkbits;
diff --git a/include/linux/biotrack.h b/include/linux/biotrack.h
new file mode 100644
index 0000000..d352abd
--- /dev/null
+++ b/include/linux/biotrack.h
@@ -0,0 +1,82 @@
+#include <linux/cgroup.h>
+#include <linux/mm.h>
+#include <linux/page_cgroup.h>
+
+#ifndef _LINUX_BIOTRACK_H
+#define _LINUX_BIOTRACK_H
+
+#ifdef CONFIG_CGROUP_BIO
+
+struct io_context;
+struct block_device;
+
+struct bio_cgroup {
+ struct cgroup_subsys_state css;
+ int id;
+ struct io_context *io_context; /* default io_context */
+/* struct radix_tree_root io_context_root; per device io_context */
+};
+
+static inline void __init_bio_page_cgroup(struct page_cgroup *pc)
+{
+ pc->bio_cgroup_id = 0;
+}
+
+static inline int bio_cgroup_disabled(void)
+{
+ return bio_cgroup_subsys.disabled;
+}
+
+extern void bio_cgroup_set_owner(struct page *page, struct mm_struct *mm);
+extern void bio_cgroup_reset_owner(struct page *page, struct mm_struct *mm);
+extern void bio_cgroup_reset_owner_pagedirty(struct page *page,
+ struct mm_struct *mm);
+extern void bio_cgroup_copy_owner(struct page *page, struct page *opage);
+
+extern struct io_context *get_bio_cgroup_iocontext(struct bio *bio);
+extern int get_bio_cgroup_id(struct bio *bio);
+
+#else /* CONFIG_CGROUP_BIO */
+
+struct bio_cgroup;
+
+static inline void __init_bio_page_cgroup(struct page_cgroup *pc)
+{
+}
+
+static inline int bio_cgroup_disabled(void)
+{
+ return 1;
+}
+
+static inline void bio_cgroup_set_owner(struct page *page, struct mm_struct *mm)
+{
+}
+
+static inline void bio_cgroup_reset_owner(struct page *page,
+ struct mm_struct *mm)
+{
+}
+
+static inline void bio_cgroup_reset_owner_pagedirty(struct page *page,
+ struct mm_struct *mm)
+{
+}
+
+static inline void bio_cgroup_copy_owner(struct page *page, struct page *opage)
+{
+}
+
+static inline struct io_context *get_bio_cgroup_iocontext(struct bio *bio)
+{
+ return NULL;
+}
+
+static inline int get_bio_cgroup_id(struct bio *bio)
+{
+ return 0;
+}
+
+#endif /* CONFIG_CGROUP_BIO */
+
+#endif /* _LINUX_BIOTRACK_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 9c22396..8eb6f48 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -43,6 +43,12 @@ SUBSYS(mem_cgroup)

/* */

+#ifdef CONFIG_CGROUP_BIO
+SUBSYS(bio_cgroup)
+#endif
+
+/* */
+
#ifdef CONFIG_CGROUP_DEVICE
SUBSYS(devices)
#endif
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 08b987b..be37c27 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -104,6 +104,7 @@ int put_io_context(struct io_context *ioc);
void exit_io_context(void);
struct io_context *get_io_context(gfp_t gfp_flags, int node);
struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
+void init_io_context(struct io_context *ioc);
void copy_io_context(struct io_context **pdst, struct io_context **psrc);
#else
static inline void exit_io_context(void)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1fbe14d..f519a88 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -20,12 +20,14 @@
#ifndef _LINUX_MEMCONTROL_H
#define _LINUX_MEMCONTROL_H

-struct mem_cgroup;
+#struct mem_cgroup;
struct page_cgroup;
struct page;
struct mm_struct;

#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+extern void __init_mem_page_cgroup(struct page_cgroup *pc);
+#define mem_cgroup_disabled() mem_cgroup_subsys.disabled

extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
@@ -71,6 +73,16 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,


#else /* CONFIG_CGROUP_MEM_RES_CTLR */
+static inline void __init_mem_page_cgroup(struct page_cgroup *pc)
+{
+}
+
+static inline int mem_cgroup_disabled(void)
+{
+ return 1;
+}
+
+
static inline int mem_cgroup_charge(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask)
{
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 35a7b5e..bf7b6e2 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -603,7 +603,7 @@ typedef struct pglist_data {
int nr_zones;
#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
struct page *node_mem_map;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_CGROUP_PAGE
struct page_cgroup *node_page_cgroup;
#endif
#endif
@@ -952,7 +952,7 @@ struct mem_section {

/* See declaration of similar field in struct zone */
unsigned long *pageblock_flags;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_CGROUP_PAGE
/*
* If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
* section. (see memcontrol.h/page_cgroup.h about this.)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index f546ad6..07aba8b 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -1,7 +1,7 @@
#ifndef __LINUX_PAGE_CGROUP_H
#define __LINUX_PAGE_CGROUP_H

-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_CGROUP_PAGE
#include <linux/bit_spinlock.h>
/*
* Page Cgroup can be considered as an extended mem_map.
@@ -12,9 +12,14 @@
*/
struct page_cgroup {
unsigned long flags;
- struct mem_cgroup *mem_cgroup;
struct page *page;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+ struct mem_cgroup *mem_cgroup;
struct list_head lru; /* per cgroup LRU list */
+#endif
+#ifdef CONFIG_CGROUP_BIO
+ int bio_cgroup_id;
+#endif
};

void __init pgdat_page_cgroup_init(struct pglist_data *pgdat);
@@ -88,7 +93,7 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc)
bit_spin_unlock(PCG_LOCK, &pc->flags);
}

-#else /* CONFIG_CGROUP_MEM_RES_CTLR */
+#else /* CONFIG_CGROUP_PAGE */
struct page_cgroup;

static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
diff --git a/init/Kconfig b/init/Kconfig
index 3c9d79b..6394a25 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -393,6 +393,21 @@ config RESOURCE_COUNTERS
infrastructure that works with cgroups
depends on CGROUPS

+config CGROUP_BIO
+ bool "Block I/O cgroup subsystem"
+ depends on CGROUPS && BLOCK
+ select MM_OWNER
+ help
+ Provides a Resource Controller which enables to track the onwner
+ of every Block I/O requests.
+ The information this subsystem provides can be used from any
+ kind of module such as dm-ioband device mapper modules or
+ the cfq-scheduler.
+
+config CGROUP_PAGE
+ def_bool y
+ depends on CGROUP_MEM_RES_CTLR || CGROUP_BIO
+
config MM_OWNER
bool

diff --git a/mm/Makefile b/mm/Makefile
index f35fcc3..5f3ba89 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -34,5 +34,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
-obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o
+obj-$(CONFIG_CGROUP_PAGE) += page_cgroup.o
+obj-$(CONFIG_CGROUP_BIO) += biotrack.o
obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
diff --git a/mm/biotrack.c b/mm/biotrack.c
new file mode 100644
index 0000000..1af5910
--- /dev/null
+++ b/mm/biotrack.c
@@ -0,0 +1,274 @@
+/* biotrack.c - Block I/O Tracking
+ *
+ * Copyright (C) VA Linux Systems Japan, 2008
+ * Developed by Hirokazu Takahashi <taka@xxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/bit_spinlock.h>
+#include <linux/idr.h>
+#include <linux/blkdev.h>
+#include <linux/biotrack.h>
+
+/*
+ * The block I/O tracking mechanism is implemented on the cgroup memory
+ * controller framework. It helps to find the the owner of an I/O request
+ * because every I/O request has a target page and the owner of the page
+ * can be easily determined on the framework.
+ */
+
+/* Return the bio_cgroup that associates with a cgroup. */
+static inline struct bio_cgroup *cgroup_bio(struct cgroup *cgrp)
+{
+ return container_of(cgroup_subsys_state(cgrp, bio_cgroup_subsys_id),
+ struct bio_cgroup, css);
+}
+
+/* Return the bio_cgroup that associates with a process. */
+static inline struct bio_cgroup *bio_cgroup_from_task(struct task_struct *p)
+{
+ return container_of(task_subsys_state(p, bio_cgroup_subsys_id),
+ struct bio_cgroup, css);
+}
+
+static struct idr bio_cgroup_id;
+static DEFINE_SPINLOCK(bio_cgroup_idr_lock);
+static struct io_context default_bio_io_context;
+static struct bio_cgroup default_bio_cgroup = {
+ .id = 0,
+ .io_context = &default_bio_io_context,
+};
+
+/*
+ * This function is used to make a given page have the bio-cgroup id of
+ * the owner of this page.
+ */
+void bio_cgroup_set_owner(struct page *page, struct mm_struct *mm)
+{
+ struct bio_cgroup *biog;
+ struct page_cgroup *pc;
+
+ if (bio_cgroup_disabled())
+ return;
+ pc = lookup_page_cgroup(page);
+ if (unlikely(!pc))
+ return;
+
+ pc->bio_cgroup_id = 0; /* 0: default bio_cgroup id */
+ if (!mm)
+ return;
+ /*
+ * Locking "pc" isn't necessary here since the current process is
+ * the only one that can access the members related to bio_cgroup.
+ */
+ rcu_read_lock();
+ biog = bio_cgroup_from_task(rcu_dereference(mm->owner));
+ if (unlikely(!biog))
+ goto out;
+ /*
+ * css_get(&bio->css) isn't called to increment the reference
+ * count of this bio_cgroup "biog" so pc->bio_cgroup_id might turn
+ * invalid even if this page is still active.
+ * This approach is chosen to minimize the overhead.
+ */
+ pc->bio_cgroup_id = biog->id;
+out:
+ rcu_read_unlock();
+}
+
+/*
+ * Change the owner of a given page if necessary.
+ */
+void bio_cgroup_reset_owner(struct page *page, struct mm_struct *mm)
+{
+ /*
+ * A little trick:
+ * Just call bio_cgroup_set_owner() for pages which are already
+ * active since the bio_cgroup_id member of page_cgroup can be
+ * updated without any locks. This is because an integer type of
+ * variable can be set a new value at once on modern cpus.
+ */
+ bio_cgroup_set_owner(page, mm);
+}
+
+/*
+ * Change the owner of a given page. This function is only effective for
+ * pages in the pagecache.
+ */
+void bio_cgroup_reset_owner_pagedirty(struct page *page, struct mm_struct *mm)
+{
+ if (PageSwapCache(page) || PageAnon(page))
+ return;
+ if (current->flags & PF_MEMALLOC)
+ return;
+
+ bio_cgroup_reset_owner(page, mm);
+}
+
+/*
+ * Assign "page" the same owner as "opage."
+ */
+void bio_cgroup_copy_owner(struct page *npage, struct page *opage)
+{
+ struct page_cgroup *npc, *opc;
+
+ if (bio_cgroup_disabled())
+ return;
+ npc = lookup_page_cgroup(npage);
+ if (unlikely(!npc))
+ return;
+ opc = lookup_page_cgroup(opage);
+ if (unlikely(!opc))
+ return;
+
+ /*
+ * Do this without any locks. The reason is the same as
+ * bio_cgroup_reset_owner().
+ */
+ npc->bio_cgroup_id = opc->bio_cgroup_id;
+}
+
+/* Create a new bio-cgroup. */
+static struct cgroup_subsys_state *
+bio_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog;
+ struct io_context *ioc;
+ int ret;
+
+ if (!cgrp->parent) {
+ biog = &default_bio_cgroup;
+ init_io_context(biog->io_context);
+ /* Increment the referrence count not to be released ever. */
+ atomic_inc(&biog->io_context->refcount);
+ idr_init(&bio_cgroup_id);
+ return &biog->css;
+ }
+
+ biog = kzalloc(sizeof(*biog), GFP_KERNEL);
+ ioc = alloc_io_context(GFP_KERNEL, -1);
+ if (!ioc || !biog) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ biog->io_context = ioc;
+retry:
+ if (!idr_pre_get(&bio_cgroup_id, GFP_KERNEL)) {
+ ret = -EAGAIN;
+ goto out_err;
+ }
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ ret = idr_get_new_above(&bio_cgroup_id, (void *)biog, 1, &biog->id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+ if (ret == -EAGAIN)
+ goto retry;
+ else if (ret)
+ goto out_err;
+
+ return &biog->css;
+out_err:
+ if (biog)
+ kfree(biog);
+ if (ioc)
+ put_io_context(ioc);
+ return ERR_PTR(ret);
+}
+
+/* Delete the bio-cgroup. */
+static void bio_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+
+ put_io_context(biog->io_context);
+
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ idr_remove(&bio_cgroup_id, biog->id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+
+ kfree(biog);
+}
+
+static struct bio_cgroup *find_bio_cgroup(int id)
+{
+ struct bio_cgroup *biog;
+ spin_lock_irq(&bio_cgroup_idr_lock);
+ /*
+ * It might fail to find A bio-group associated with "id" since it
+ * is allowed to remove the bio-cgroup even when some of I/O requests
+ * this group issued haven't completed yet.
+ */
+ biog = (struct bio_cgroup *)idr_find(&bio_cgroup_id, id);
+ spin_unlock_irq(&bio_cgroup_idr_lock);
+ return biog;
+}
+
+/* Determine the bio-cgroup id of a given bio. */
+int get_bio_cgroup_id(struct bio *bio)
+{
+ struct page_cgroup *pc;
+ struct page *page = bio_iovec_idx(bio, 0)->bv_page;
+ int id = 0;
+
+ pc = lookup_page_cgroup(page);
+ if (pc)
+ id = pc->bio_cgroup_id;
+ return id;
+}
+
+/* Determine the iocontext of the bio-cgroup that issued a given bio. */
+struct io_context *get_bio_cgroup_iocontext(struct bio *bio)
+{
+ struct bio_cgroup *biog = NULL;
+ struct io_context *ioc;
+ int id = 0;
+
+ id = get_bio_cgroup_id(bio);
+ if (id)
+ biog = find_bio_cgroup(id);
+ if (!biog)
+ biog = &default_bio_cgroup;
+ ioc = biog->io_context; /* default io_context for this cgroup */
+ atomic_inc(&ioc->refcount);
+ return ioc;
+}
+EXPORT_SYMBOL(get_bio_cgroup_iocontext);
+EXPORT_SYMBOL(get_bio_cgroup_id);
+
+static u64 bio_id_read(struct cgroup *cgrp, struct cftype *cft)
+{
+ struct bio_cgroup *biog = cgroup_bio(cgrp);
+ return (u64) biog->id;
+}
+
+
+static struct cftype bio_files[] = {
+ {
+ .name = "id",
+ .read_u64 = bio_id_read,
+ },
+};
+
+static int bio_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ return cgroup_add_files(cgrp, ss, bio_files, ARRAY_SIZE(bio_files));
+}
+
+struct cgroup_subsys bio_cgroup_subsys = {
+ .name = "bio",
+ .create = bio_cgroup_create,
+ .destroy = bio_cgroup_destroy,
+ .populate = bio_cgroup_populate,
+ .subsys_id = bio_cgroup_subsys_id,
+};
+
diff --git a/mm/bounce.c b/mm/bounce.c
index 06722c4..02096a6 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -14,6 +14,7 @@
#include <linux/hash.h>
#include <linux/highmem.h>
#include <linux/blktrace_api.h>
+#include <linux/biotrack.h>
#include <asm/tlbflush.h>

#define POOL_SIZE 64
@@ -204,6 +205,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
to->bv_len = from->bv_len;
to->bv_offset = from->bv_offset;
inc_zone_page_state(to->bv_page, NR_BOUNCE);
+ bio_cgroup_copy_owner(to->bv_page, page);

if (rw == WRITE) {
char *vto, *vfrom;
diff --git a/mm/filemap.c b/mm/filemap.c
index 721eace..fe58262 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,6 +33,7 @@
#include <linux/cpuset.h>
#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
#include <linux/memcontrol.h>
+#include <linux/biotrack.h>
#include <linux/mm_inline.h> /* for page_is_file_cache() */
#include "internal.h"

@@ -464,6 +465,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
gfp_mask & ~__GFP_HIGHMEM);
if (error)
goto out;
+ bio_cgroup_set_owner(page, current->mm);

error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
if (error == 0) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 866dcc7..95048fe 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -157,6 +157,11 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
0, /* FORCE */
};

+void __meminit __init_mem_page_cgroup(struct page_cgroup *pc)
+{
+ pc->mem_cgroup = NULL;
+}
+
/*
* Always modified under lru lock. Then, not necessary to preempt_disable()
*/
diff --git a/mm/memory.c b/mm/memory.c
index fd7d89b..4447ebe 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -51,6 +51,7 @@
#include <linux/init.h>
#include <linux/writeback.h>
#include <linux/memcontrol.h>
+#include <linux/biotrack.h>
#include <linux/mmu_notifier.h>

#include <asm/pgalloc.h>
@@ -1915,6 +1916,7 @@ gotten:
*/
ptep_clear_flush_notify(vma, address, page_table);
SetPageSwapBacked(new_page);
+ bio_cgroup_set_owner(new_page, mm);
lru_cache_add_active_or_unevictable(new_page, vma);
page_add_new_anon_rmap(new_page, vma, address);

@@ -2353,6 +2355,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
flush_icache_page(vma, page);
set_pte_at(mm, address, page_table, pte);
page_add_anon_rmap(page, vma, address);
+ bio_cgroup_reset_owner(page, mm);

swap_free(entry);
if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2414,6 +2417,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto release;
inc_mm_counter(mm, anon_rss);
SetPageSwapBacked(page);
+ bio_cgroup_set_owner(page, mm);
lru_cache_add_active_or_unevictable(page, vma);
page_add_new_anon_rmap(page, vma, address);
set_pte_at(mm, address, page_table, entry);
@@ -2563,6 +2567,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (anon) {
inc_mm_counter(mm, anon_rss);
SetPageSwapBacked(page);
+ bio_cgroup_set_owner(page, mm);
lru_cache_add_active_or_unevictable(page, vma);
page_add_new_anon_rmap(page, vma, address);
} else {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b3584bf..f24daaa 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -23,6 +23,7 @@
#include <linux/init.h>
#include <linux/backing-dev.h>
#include <linux/task_io_accounting_ops.h>
+#include <linux/biotrack.h>
#include <linux/blkdev.h>
#include <linux/mpage.h>
#include <linux/rmap.h>
@@ -1100,6 +1101,7 @@ int __set_page_dirty_nobuffers(struct page *page)
BDI_RECLAIMABLE);
task_io_account_write(PAGE_CACHE_SIZE);
}
+ bio_cgroup_reset_owner_pagedirty(page, current->mm);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index f59d797..e6a882a 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -8,13 +8,16 @@
#include <linux/memory.h>
#include <linux/vmalloc.h>
#include <linux/cgroup.h>
+#include <linux/memcontrol.h>
+#include <linux/biotrack.h>

static void __meminit
__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
{
pc->flags = 0;
- pc->mem_cgroup = NULL;
pc->page = pfn_to_page(pfn);
+ __init_mem_page_cgroup(pc);
+ __init_bio_page_cgroup(pc);
}
static unsigned long total_usage;

@@ -69,7 +72,7 @@ void __init page_cgroup_init(void)

int nid, fail;

- if (mem_cgroup_subsys.disabled)
+ if (mem_cgroup_disabled() && bio_cgroup_disabled())
return;

for_each_online_node(nid) {
@@ -78,12 +81,12 @@ void __init page_cgroup_init(void)
goto fail;
}
printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
- printk(KERN_INFO "please try cgroup_disable=memory option if you"
+ printk(KERN_INFO "please try cgroup_disable=memory,bio option if you"
" don't want\n");
return;
fail:
printk(KERN_CRIT "allocation of page_cgroup was failed.\n");
- printk(KERN_CRIT "please try cgroup_disable=memory boot option\n");
+ printk(KERN_CRIT "please try cgroup_disable=memory,bio boot options\n");
panic("Out of memory");
}

@@ -229,7 +232,7 @@ void __init page_cgroup_init(void)
unsigned long pfn;
int fail = 0;

- if (mem_cgroup_subsys.disabled)
+ if (mem_cgroup_disabled() && bio_cgroup_disabled())
return;

for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
@@ -244,7 +247,7 @@ void __init page_cgroup_init(void)
hotplug_memory_notifier(page_cgroup_callback, 0);
}
printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
- printk(KERN_INFO "please try cgroup_disable=memory option if you don't"
+ printk(KERN_INFO "please try cgroup_disable=memory,bio option if you don't"
" want\n");
}

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3353c90..42a5b45 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -17,6 +17,7 @@
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/migrate.h>
+#include <linux/biotrack.h>

#include <asm/pgtable.h>

@@ -305,6 +306,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
*/
__set_page_locked(new_page);
SetPageSwapBacked(new_page);
+ bio_cgroup_set_owner(new_page, current->mm);
err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
if (likely(!err)) {
/*
-- 1.5.4.rc3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/