[RFC] [PATCH] cgroup: accounting and limitation of disk quota

From: anqin
Date: Sun Feb 22 2009 - 07:38:01 EST


The patch presents a cgroup subsystem to control the usage of disk quota.

The subsystem for disk quota (disk_cgroup, to be brief) does accounting
of inode and block allocated by ext3/ext2 filesystem. Simarily as
filesystem quota, the disk_cgroup can do limitation but without needing
to open filesytem quota options (e.g. usrquota,grpquota in /etc/fstab).

The simple usage of disk_cgroup is as follows:

# mount -t cgroup cgroup /mnt/cgrp
# lxc-execute -n lxc-template.conf /bin/bash
# ls /mnt/cgrp/11457/ // <-- 11457 is the pid of bash
...
disk.stat
disk.usage_in_inode
disk.usage_in_block
disk.max_usage_in_inode
disk.max_usage_in_block
disk.limit_in_inode
disk.limit_in_inode
...

# echo 3 > /mnt/cgrp/11457/disk.max_usage_in_block

# touch /tmp/mytestfile1
# touch /tmp/mytestfile2
# touch /tmp/mytestfile3
# touch /tmp/mytestfile4
touch: cannot touch `/tmp/mytestfile4': Disk quota exceeded

The disk_cgroup is easily extended to manage complex objects
of filesystem.


Signed-off-by: An Qin <anqin.qin@xxxxxxxxx>

---
diff -uprN -X linux-2.6.28.5/Documentation/dontdiff
linux-2.6.28.5/fs/ext2/balloc.c
linux-2.6.28.5-cgroup-disk-quota/fs/ext2/balloc.c
--- linux-2.6.28.5/fs/ext2/balloc.c 2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext2/balloc.c 2009-02-21
12:09:17.000000000 +0800
@@ -16,7 +16,7 @@
#include <linux/sched.h>
#include <linux/buffer_head.h>
#include <linux/capability.h>
-
+#include <linux/cgroup_disk.h>
/*
* balloc.c contains the blocks allocation and deallocation routines
*/
@@ -571,6 +571,8 @@ error_return:
brelse(bitmap_bh);
release_blocks(sb, freed);
DQUOT_FREE_BLOCK(inode, freed);
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+ freed << inode->i_sb->s_blocksize_bits);
}

/**
@@ -1247,11 +1249,15 @@ ext2_fsblk_t ext2_new_blocks(struct inod
/*
* Check quota for allocation of this block.
*/
- if (DQUOT_ALLOC_BLOCK(inode, num)) {
+ if (DQUOT_ALLOC_BLOCK(inode, num)
+ || disk_cgroup_check_quota(DISK_CURRENT_BLOCK,
+ num << inode->i_sb->s_blocksize_bits)) {
*errp = -EDQUOT;
return 0;
}

+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,1,
+ num << inode->i_sb->s_blocksize_bits);
sbi = EXT2_SB(sb);
es = EXT2_SB(sb)->s_es;
ext2_debug("goal=%lu.\n", goal);
@@ -1410,6 +1416,8 @@ allocated:
*errp = 0;
brelse(bitmap_bh);
DQUOT_FREE_BLOCK(inode, *count-num);
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+ (*count-num) << inode->i_sb->s_blocksize_bits);
*count = num;
return ret_block;

@@ -1419,8 +1427,11 @@ out:
/*
* Undo the block allocation
*/
- if (!performed_allocation)
+ if (!performed_allocation) {
DQUOT_FREE_BLOCK(inode, *count);
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+ (*count) << inode->i_sb->s_blocksize_bits);
+ }
brelse(bitmap_bh);
return 0;
}
diff -uprN -X linux-2.6.28.5/Documentation/dontdiff
linux-2.6.28.5/fs/ext2/ialloc.c
linux-2.6.28.5-cgroup-disk-quota/fs/ext2/ialloc.c
--- linux-2.6.28.5/fs/ext2/ialloc.c 2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext2/ialloc.c 2009-02-19
06:50:51.000000000 +0800
@@ -17,6 +17,7 @@
#include <linux/backing-dev.h>
#include <linux/buffer_head.h>
#include <linux/random.h>
+#include <linux/cgroup_disk.h>
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
@@ -123,6 +124,7 @@ void ext2_free_inode (struct inode * ino
ext2_xattr_delete_inode(inode);
DQUOT_FREE_INODE(inode);
DQUOT_DROP(inode);
+ disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
}

es = EXT2_SB(sb)->s_es;
@@ -587,11 +589,12 @@ got:
spin_unlock(&sbi->s_next_gen_lock);
insert_inode_hash(inode);

- if (DQUOT_ALLOC_INODE(inode)) {
+ if (DQUOT_ALLOC_INODE(inode) ||
disk_cgroup_check_quota(DISK_CURRENT_INODE,1)) {
err = -EDQUOT;
goto fail_drop;
}

+ disk_cgroup_acct_quota(DISK_CURRENT_INODE,1,1);
err = ext2_init_acl(inode, dir);
if (err)
goto fail_free_drop;
@@ -607,9 +610,11 @@ got:

fail_free_drop:
DQUOT_FREE_INODE(inode);
+ disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);

fail_drop:
DQUOT_DROP(inode);
+ disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
inode->i_flags |= S_NOQUOTA;
inode->i_nlink = 0;
iput(inode);
diff -uprN -X linux-2.6.28.5/Documentation/dontdiff
linux-2.6.28.5/fs/ext2/xattr.c
linux-2.6.28.5-cgroup-disk-quota/fs/ext2/xattr.c
--- linux-2.6.28.5/fs/ext2/xattr.c 2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext2/xattr.c 2009-02-19
06:50:51.000000000 +0800
@@ -60,6 +60,7 @@
#include <linux/mbcache.h>
#include <linux/quotaops.h>
#include <linux/rwsem.h>
+#include <linux/cgroup_disk.h>
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
@@ -640,12 +641,17 @@ ext2_xattr_set2(struct inode *inode, str
/* The old block is released after updating
the inode. */
ea_bdebug(new_bh, "reusing block");
-
error = -EDQUOT;
- if (DQUOT_ALLOC_BLOCK(inode, 1)) {
+ if (DQUOT_ALLOC_BLOCK(inode, 1)
+ ||disk_cgroup_check_quota(DISK_CURRENT_BLOCK,
+ 1 << inode->i_sb->s_blocksize_bits)) {
unlock_buffer(new_bh);
goto cleanup;
}
+
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,1,
+ 1 << inode->i_sb->s_blocksize_bits);
+
le32_add_cpu(&HDR(new_bh)->h_refcount, 1);
ea_bdebug(new_bh, "refcount now=%d",
le32_to_cpu(HDR(new_bh)->h_refcount));
@@ -698,8 +704,11 @@ ext2_xattr_set2(struct inode *inode, str
* written (only some dirty data were not) so we just proceed
* as if nothing happened and cleanup the unused block */
if (error && error != -ENOSPC) {
- if (new_bh && new_bh != old_bh)
+ if (new_bh && new_bh != old_bh) {
DQUOT_FREE_BLOCK(inode, 1);
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+ 1 << inode->i_sb->s_blocksize_bits);
+ }
goto cleanup;
}
} else
@@ -732,6 +741,8 @@ ext2_xattr_set2(struct inode *inode, str
if (ce)
mb_cache_entry_release(ce);
DQUOT_FREE_BLOCK(inode, 1);
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+ 1 << inode->i_sb->s_blocksize_bits);
mark_buffer_dirty(old_bh);
ea_bdebug(old_bh, "refcount now=%d",
le32_to_cpu(HDR(old_bh)->h_refcount));
diff -uprN -X linux-2.6.28.5/Documentation/dontdiff
linux-2.6.28.5/fs/ext3/balloc.c
linux-2.6.28.5-cgroup-disk-quota/fs/ext3/balloc.c
--- linux-2.6.28.5/fs/ext3/balloc.c 2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext3/balloc.c 2009-02-21
12:27:44.000000000 +0800
@@ -20,6 +20,8 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>

+#include <linux/cgroup_disk.h>
+
/*
* balloc.c contains the blocks allocation and deallocation routines
*/
@@ -675,8 +677,11 @@ void ext3_free_blocks(handle_t *handle,
return;
}
ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
- if (dquot_freed_blocks)
+ if (dquot_freed_blocks) {
DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+ dquot_freed_blocks << inode->i_sb->s_blocksize_bits);
+ }
return;
}

@@ -1502,10 +1507,14 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h
/*
* Check quota for allocation of this block.
*/
- if (DQUOT_ALLOC_BLOCK(inode, num)) {
+ if (DQUOT_ALLOC_BLOCK(inode, num)
+ || disk_cgroup_check_quota(DISK_CURRENT_BLOCK,
+ num << inode->i_sb->s_blocksize_bits)) {
*errp = -EDQUOT;
return 0;
}
+
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,1,num <<
inode->i_sb->s_blocksize_bits);

sbi = EXT3_SB(sb);
es = EXT3_SB(sb)->s_es;
@@ -1715,6 +1724,8 @@ allocated:
*errp = 0;
brelse(bitmap_bh);
DQUOT_FREE_BLOCK(inode, *count-num);
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+ (*count-num) << inode->i_sb->s_blocksize_bits);
*count = num;
return ret_block;

@@ -1728,8 +1739,11 @@ out:
/*
* Undo the block allocation
*/
- if (!performed_allocation)
+ if (!performed_allocation) {
DQUOT_FREE_BLOCK(inode, *count);
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+ (*count) << inode->i_sb->s_blocksize_bits);
+ }
brelse(bitmap_bh);
return 0;
}
diff -uprN -X linux-2.6.28.5/Documentation/dontdiff
linux-2.6.28.5/fs/ext3/ialloc.c
linux-2.6.28.5-cgroup-disk-quota/fs/ext3/ialloc.c
--- linux-2.6.28.5/fs/ext3/ialloc.c 2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext3/ialloc.c 2009-02-19
06:51:05.000000000 +0800
@@ -25,6 +25,7 @@
#include <linux/bitops.h>

#include <asm/byteorder.h>
+#include <linux/cgroup_disk.h>

#include "xattr.h"
#include "acl.h"
@@ -126,6 +127,7 @@ void ext3_free_inode (handle_t *handle,
DQUOT_INIT(inode);
ext3_xattr_delete_inode(handle, inode);
DQUOT_FREE_INODE(inode);
+ disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
DQUOT_DROP(inode);

is_directory = S_ISDIR(inode->i_mode);
@@ -590,11 +592,13 @@ got:
sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;

ret = inode;
- if(DQUOT_ALLOC_INODE(inode)) {
+ if(DQUOT_ALLOC_INODE(inode) ||
disk_cgroup_check_quota(DISK_CURRENT_INODE,1)) {
err = -EDQUOT;
goto fail_drop;
}

+ disk_cgroup_acct_quota(DISK_CURRENT_INODE,1,1);
+
err = ext3_init_acl(handle, inode, dir);
if (err)
goto fail_free_drop;
@@ -622,6 +626,7 @@ really_out:

fail_free_drop:
DQUOT_FREE_INODE(inode);
+ disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);

fail_drop:
DQUOT_DROP(inode);
diff -uprN -X linux-2.6.28.5/Documentation/dontdiff
linux-2.6.28.5/fs/ext3/xattr.c
linux-2.6.28.5-cgroup-disk-quota/fs/ext3/xattr.c
--- linux-2.6.28.5/fs/ext3/xattr.c 2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext3/xattr.c 2009-02-19
06:51:06.000000000 +0800
@@ -58,6 +58,7 @@
#include <linux/mbcache.h>
#include <linux/quotaops.h>
#include <linux/rwsem.h>
+#include <linux/cgroup_disk.h>
#include "xattr.h"
#include "acl.h"

@@ -499,6 +500,8 @@ ext3_xattr_release_block(handle_t *handl
if (IS_SYNC(inode))
handle->h_sync = 1;
DQUOT_FREE_BLOCK(inode, 1);
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+ 1 << inode->i_sb->s_blocksize_bits);
ea_bdebug(bh, "refcount now=%d; releasing",
le32_to_cpu(BHDR(bh)->h_refcount));
if (ce)
@@ -773,9 +776,16 @@ inserted:
else {
/* The old block is released after updating
the inode. */
+
error = -EDQUOT;
- if (DQUOT_ALLOC_BLOCK(inode, 1))
+ if (DQUOT_ALLOC_BLOCK(inode, 1)
+ || disk_cgroup_check_quota(DISK_CURRENT_BLOCK,
+ 1 << inode->i_sb->s_blocksize_bits))
goto cleanup;
+
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,1,
+ 1 << inode->i_sb->s_blocksize_bits);
+
error = ext3_journal_get_write_access(handle,
new_bh);
if (error)
@@ -849,6 +859,9 @@ cleanup:

cleanup_dquot:
DQUOT_FREE_BLOCK(inode, 1);
+ disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+ 1 << inode->i_sb->s_blocksize_bits);
+
goto cleanup;

bad_block:
diff -uprN -X linux-2.6.28.5/Documentation/dontdiff
linux-2.6.28.5/include/linux/cgroup_disk.h
linux-2.6.28.5-cgroup-disk-quota/include/linux/cgroup_disk.h
--- linux-2.6.28.5/include/linux/cgroup_disk.h 1970-01-01
08:00:00.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/include/linux/cgroup_disk.h 2009-02-21
11:57:57.000000000 +0800
@@ -0,0 +1,28 @@
+#ifndef CGROUP_DISK_H
+#define CGROUP_DISK_H
+
+#include <linux/quota.h>
+
+enum {
+ DISK_MAX_USAGE_BLOCK,
+ DISK_CURRENT_BLOCK,
+ DISK_LIMIT_BLOCK,
+
+ DISK_MAX_USAGE_INODE,
+ DISK_CURRENT_INODE,
+ DISK_LIMIT_INODE,
+
+ DISK_USAGE_STAT,
+};
+
+#ifdef CONFIG_CGROUP_DISK
+extern void disk_cgroup_acct_stat(struct dqstats *pstat);
+extern void disk_cgroup_acct_quota(int dq_type, int inc, unsigned
long long number);
+extern int disk_cgroup_check_quota(int dq_type, unsigned long long number);
+#else
+static inline void disk_cgroup_acct_stat(struct dqstats *pstat) { }
+static inline void disk_cgroup_acct_quota(int dq_type, int inc,
unsigned long long number) { }
+static inline int disk_cgroup_check_quota(int dq_type, unsigned long
long number) { }
+#endif /* CONFIG_CGROUP_DISK */
+
+#endif
diff -uprN -X linux-2.6.28.5/Documentation/dontdiff
linux-2.6.28.5/include/linux/cgroup_subsys.h
linux-2.6.28.5-cgroup-disk-quota/include/linux/cgroup_subsys.h
--- linux-2.6.28.5/include/linux/cgroup_subsys.h 2009-02-13
01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/include/linux/cgroup_subsys.h 2009-02-19
06:48:52.000000000 +0800
@@ -53,4 +53,8 @@ SUBSYS(devices)
SUBSYS(freezer)
#endif

+#ifdef CONFIG_CGROUP_DISK
+SUBSYS(disk_cgroup)
+#endif
+
/* */
diff -uprN -X linux-2.6.28.5/Documentation/dontdiff
linux-2.6.28.5/init/Kconfig
linux-2.6.28.5-cgroup-disk-quota/init/Kconfig
--- linux-2.6.28.5/init/Kconfig 2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/init/Kconfig 2009-02-19
06:50:43.000000000 +0800
@@ -313,6 +313,16 @@ config CGROUP_DEVICE
Provides a cgroup implementing whitelists for devices which
a process in the cgroup can mknod or open.

+
+config CGROUP_DISK
+ bool "Enable cgroup disk quota limitinig (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && CGROUPS
+ help
+ This allows to define disk quota limiting/shaping rules for
+ specific cgroup(s).
+
+ Say N if unsure.
+
config CPUSETS
bool "Cpuset support"
depends on SMP && CGROUPS
diff -uprN -X linux-2.6.28.5/Documentation/dontdiff
linux-2.6.28.5/kernel/cgroup_disk.c
linux-2.6.28.5-cgroup-disk-quota/kernel/cgroup_disk.c
--- linux-2.6.28.5/kernel/cgroup_disk.c 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/kernel/cgroup_disk.c 2009-02-21
11:48:17.000000000 +0800
@@ -0,0 +1,375 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * Writen by An Qin <anqin.qin@xxxxxxxxx>
+ */
+
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/quota.h>
+
+#include <linux/cgroup_disk.h>
+
+
+struct disk_cgroup_stat
+{
+ int lookups;
+ int drops;
+ int reads;
+ int writes;
+ int cache_hits;
+ int allocated_dquots;
+ int free_dquots;
+ int syncs;
+};
+
+struct disk_cgroup_quota
+{
+ qsize_t dqb_bhardlimit;
+ qsize_t dqb_bsoftlimit;
+ qsize_t dqb_curspace;
+ qsize_t dqb_ihardlimit;
+ qsize_t dqb_isoftlimit;
+ qsize_t dqb_curinodes;
+ time_t dqb_btime;
+ time_t dqb_itime;
+};
+
+
+struct disk_cgroup
+{
+ struct cgroup_subsys_state css;
+ spinlock_t lock;
+ struct disk_cgroup_quota quota;
+ struct disk_cgroup_stat stat;
+};
+
+
+static inline struct disk_cgroup *cgroup_to_disk_cgroup(struct cgroup *cont)
+{
+ return container_of(cgroup_subsys_state(cont, disk_cgroup_subsys_id),
+ struct disk_cgroup, css);
+}
+
+static inline struct disk_cgroup *task_to_disk_cgroup(struct task_struct *task)
+{
+ return container_of(task_subsys_state(task, disk_cgroup_subsys_id),
+ struct disk_cgroup, css);
+}
+
+struct cgroup_subsys disk_cgroup_subsys;
+
+static struct cgroup_subsys_state *disk_cgroup_create(
+ struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ struct disk_cgroup *disk;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ if (!cgroup_is_descendant(cont))
+ return ERR_PTR(-EPERM);
+
+ disk = kzalloc(sizeof(struct disk_cgroup), GFP_KERNEL);
+ if (unlikely(!disk))
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&disk->lock);
+
+ return &disk->css;
+}
+
+static void disk_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ kfree(cgroup_to_disk_cgroup(cont));
+}
+
+
+static ssize_t disk_cgroup_read_stat(struct cgroup *cont, struct cftype *cft,
+ struct file *file, char __user *buf,
+ size_t nbytes, loff_t *ppos)
+{
+ ssize_t count, ret;
+ struct disk_cgroup_stat stat;
+ struct disk_cgroup *disk;
+ char *page;
+
+ page = (char *)__get_free_page(GFP_TEMPORARY);
+ if (!page)
+ return -ENOMEM;
+
+ cgroup_lock();
+ if (cgroup_is_removed(cont)) {
+ cgroup_unlock();
+ ret = -ENODEV;
+ goto out;
+ }
+
+ disk = cgroup_to_disk_cgroup(cont);
+ spin_lock_irq(&disk->lock);
+
+ /* may cause segment fault, pay attention */
+ memcpy(&stat,&(disk->stat),sizeof(stat));
+
+ spin_unlock_irq(&disk->lock);
+ cgroup_unlock();
+
+ /* print additional debugging stuff */
+ count = sprintf(page, " type: %s\n"
+ " lookups: %d\n"
+ " drops: %d\n"
+ " reads: %d\n"
+ " writes: %d\n"
+ " cache_hits: %d\n"
+ "allocated_dquots: %d\n"
+ " free_dquots: %d\n"
+ " syncs: %d\n",
+ cft->name,
+ stat.lookups, stat.drops, stat.reads,
+ stat.writes, stat.cache_hits,
+ stat.allocated_dquots, stat.free_dquots,
+ stat.syncs);
+
+ ret = simple_read_from_buffer(buf, nbytes, ppos, page, count);
+
+out:
+ free_page((unsigned long)page);
+ return ret;
+}
+
+static ssize_t disk_cgroup_read_quota(struct cgroup *cont, struct cftype *cft,
+ struct file *file, char __user *buf,
+ size_t nbytes, loff_t *ppos)
+{
+ ssize_t count, ret = 0;
+ struct disk_cgroup_quota quota;
+ struct disk_cgroup *disk;
+ char *page;
+
+ page = (char *)__get_free_page(GFP_TEMPORARY);
+ if (!page)
+ return -ENOMEM;
+
+ cgroup_lock();
+ if (cgroup_is_removed(cont)) {
+ cgroup_unlock();
+ ret = -ENODEV;
+ goto out;
+ }
+
+ disk = cgroup_to_disk_cgroup(cont);
+ spin_lock_irq(&disk->lock);
+ memcpy(&quota,&(disk->quota),sizeof(quota));
+ spin_unlock_irq(&disk->lock);
+ cgroup_unlock();
+
+ switch(cft->private) {
+ case DISK_CURRENT_BLOCK:
+ count = sprintf(page, "current usage of block: %llu\n",
+ quota.dqb_curspace);
+ break;
+ case DISK_CURRENT_INODE:
+ count = sprintf(page, "current usage of inode: %llu\n",
+ quota.dqb_curinodes);
+ break;
+ case DISK_MAX_USAGE_BLOCK:
+ count = sprintf(page, "%llu\n",
+ quota.dqb_bhardlimit);
+ break;
+ case DISK_MAX_USAGE_INODE:
+ count = sprintf(page, "%llu\n",
+ quota.dqb_ihardlimit);
+ break;
+ case DISK_LIMIT_BLOCK:
+ count = sprintf(page, "%llu\n",
+ quota.dqb_bsoftlimit);
+ break;
+ case DISK_LIMIT_INODE:
+ count = sprintf(page, "%llu\n",
+ quota.dqb_isoftlimit);
+ break;
+ default:
+ goto out;
+
+ }
+ ret = simple_read_from_buffer(buf, nbytes, ppos, page, count);
+
+out:
+ free_page((unsigned long)page);
+ return ret;
+}
+
+
+static int disk_cgroup_write_u64(struct cgroup *cont, struct cftype *cft,
+ u64 val)
+{
+ struct disk_cgroup *disk;
+ int ret = 0;
+
+ cgroup_lock();
+ if (cgroup_is_removed(cont)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ disk = cgroup_to_disk_cgroup(cont);
+
+ spin_lock_irq(&disk->lock);
+ switch(cft->private)
+ {
+ case DISK_MAX_USAGE_BLOCK:
+ disk->quota.dqb_bhardlimit = (unsigned long long) val;
+ break;
+ case DISK_MAX_USAGE_INODE:
+ disk->quota.dqb_ihardlimit = (unsigned long long) val;
+ break;
+ case DISK_LIMIT_BLOCK:
+ disk->quota.dqb_bsoftlimit = (unsigned long long) val;
+ break;
+ case DISK_LIMIT_INODE:
+ disk->quota.dqb_isoftlimit = (unsigned long long) val;
+ break;
+ default:
+ break;
+ }
+ spin_unlock_irq(&disk->lock);
+
+out:
+ cgroup_unlock();
+ return ret;
+}
+
+
+static struct cftype disk_cgroup_files[] = {
+ {
+ .name = "stat",
+ .read = disk_cgroup_read_stat,
+ .private = DISK_USAGE_STAT,
+ },
+ {
+ .name = "usage_in_block",
+ .read = disk_cgroup_read_quota,
+ .private = DISK_CURRENT_BLOCK,
+ },
+ {
+ .name = "usage_in_inode",
+ .read = disk_cgroup_read_quota,
+ .private = DISK_CURRENT_INODE,
+ },
+ {
+ .name = "max_usage_in_block",
+ .read = disk_cgroup_read_quota,
+ .write_u64 = disk_cgroup_write_u64,
+ .private = DISK_MAX_USAGE_BLOCK,
+ },
+ {
+ .name = "max_usage_in_inode",
+ .read = disk_cgroup_read_quota,
+ .write_u64 = disk_cgroup_write_u64,
+ .private = DISK_MAX_USAGE_INODE,
+ },
+ {
+ .name = "limit_in_block",
+ .read = disk_cgroup_read_quota,
+ .write_u64 = disk_cgroup_write_u64,
+ .private = DISK_LIMIT_BLOCK,
+ },
+ {
+ .name = "limit_in_inode",
+ .read = disk_cgroup_read_quota,
+ .write_u64 = disk_cgroup_write_u64,
+ .private = DISK_LIMIT_INODE,
+ },
+};
+
+static int disk_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ return cgroup_add_files(cont, ss, disk_cgroup_files,
ARRAY_SIZE(disk_cgroup_files));
+}
+
+struct cgroup_subsys disk_cgroup_subsys = {
+ .name = "disk",
+ .create = disk_cgroup_create,
+ .destroy = disk_cgroup_destroy,
+ .populate = disk_cgroup_populate,
+ .subsys_id = disk_cgroup_subsys_id,
+};
+
+void disk_cgroup_acct_stat(struct dqstats *pstat)
+{
+ struct disk_cgroup *disk;
+
+ disk = task_to_disk_cgroup(current);
+ if (!disk)
+ return;
+
+ disk->stat.lookups += pstat->lookups;
+ disk->stat.drops += pstat->drops;
+ disk->stat.reads += pstat->reads;
+ disk->stat.writes += pstat->writes;
+ disk->stat.cache_hits += pstat->cache_hits;
+ disk->stat.allocated_dquots += pstat->allocated_dquots;
+ disk->stat.free_dquots += pstat->free_dquots;
+ disk->stat.syncs += pstat->syncs;
+}
+EXPORT_SYMBOL(disk_cgroup_acct_stat);
+
+void disk_cgroup_acct_quota(int dq_type, int inc, unsigned long long number)
+{
+ struct disk_cgroup *disk;
+
+ disk = task_to_disk_cgroup(current);
+ if (!disk)
+ return;
+
+ if(dq_type == DISK_CURRENT_BLOCK ) {
+ if(inc)
+ disk->quota.dqb_curspace += number;
+ else if(disk->quota.dqb_curspace > number)
+ disk->quota.dqb_curspace -= number;
+ else disk->quota.dqb_curspace = 0;
+ }
+ else if(dq_type == DISK_CURRENT_INODE) {
+ if(inc)
+ disk->quota.dqb_curinodes += number;
+ else if(disk->quota.dqb_curinodes > number)
+ disk->quota.dqb_curinodes -= number;
+ else disk->quota.dqb_curinodes = 0;
+ }
+}
+EXPORT_SYMBOL(disk_cgroup_acct_quota);
+
+int disk_cgroup_check_quota(int dq_type, unsigned long long number)
+{
+ struct disk_cgroup *disk;
+ int ret = 0;
+
+ disk = task_to_disk_cgroup(current);
+ if (!disk)
+ return ret;
+
+
+ if(dq_type == DISK_CURRENT_BLOCK
+ && disk->quota.dqb_bhardlimit > 0
+ && disk->quota.dqb_curspace + number > disk->quota.dqb_bhardlimit)
+ ret = -1;
+ else if(dq_type == DISK_CURRENT_INODE
+ && disk->quota.dqb_ihardlimit > 0
+ && disk->quota.dqb_curinodes + number >
disk->quota.dqb_ihardlimit)
+ ret = -1;
+
+ return ret;
+}
+EXPORT_SYMBOL(disk_cgroup_check_quota);
diff -uprN -X linux-2.6.28.5/Documentation/dontdiff
linux-2.6.28.5/kernel/Makefile
linux-2.6.28.5-cgroup-disk-quota/kernel/Makefile
--- linux-2.6.28.5/kernel/Makefile 2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/kernel/Makefile 2009-02-19
06:52:04.000000000 +0800
@@ -55,6 +55,7 @@ obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup.o
obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
+obj-$(CONFIG_CGROUP_FREEZER) += cgroup_disk.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
obj-$(CONFIG_UTS_NS) += utsname.o
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/