All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] cgroup for disk quota
@ 2009-02-20  8:28 anqin
       [not found] ` <d95d44a20902200028h1e229cc0pa3cdd4f42814e78e-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: anqin @ 2009-02-20  8:28 UTC (permalink / raw)
  To: Daniel Lezcano, Serge E. Hallyn,
	containers-qjLDD68F18O7TbgM5vRIOg

[-- Attachment #1: Type: text/plain, Size: 1527 bytes --]

Dear Daniel and Serge,

For unified management of resources (CPU, memory, disk, network),
I (and Ian) developed a cgroup subsystem to control the usage
of disk quota.

The subsystem for disk quota (disk_cgroup, to be brief) does accounting
of inode and block allocated by ext3/ext2 filesystem. Simarily as
filesystem quota, the disk_cgroup can do limitation but without needing
to open filesytem quota options (e.g. usrquota,grpquota in /etc/fstab).
Since this patch is first developed, it needs more feedback and testing
from other developers or users.

The simple usage of disk_cgroup is as follows:

# mount -t cgroup cgroup /mnt/cgrp
# lxc-execute -n lxc-template.conf /bin/bash
# ls /mnt/cgrp/11457/		// <--  11457 is the pid of bash
...
disk.stat
disk.usage_in_inode
disk.usage_in_block
disk.max_usage_in_inode
disk.max_usage_in_block
disk.limit_in_inode
disk.limit_in_inode
...

# echo  3 > /mnt/cgrp/11457/disk.max_usage_in_block

# touch /tmp/mytestfile1
# touch /tmp/mytestfile2
# touch /tmp/mytestfile3
# touch /tmp/mytestfile4
touch: cannot touch `/tmp/mytestfile4': Disk quota exceeded

The disk_cgroup is easily extended to manage complex objects
of filesystem.

BTW, I don't know how to submit a "useful" patch to kernel community. Or,
maybe the patch is not useful at all and maybe has been developed by other
developers. I very appreciate if both of experts could give me some commend.
I will continue to develop cgroup-related codes to make contribution to kernel
development.

Any comment is welcome,

Anqin

[-- Attachment #2: linux-2.6.28.5-cgroup-disk-quota.patch --]
[-- Type: application/octet-stream, Size: 25421 bytes --]

diff -Naur linux-2.6.28.5/fs/ext2/balloc.c linux-2.6.28.5-cgroup-disk-quota/fs/ext2/balloc.c
--- linux-2.6.28.5/fs/ext2/balloc.c	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext2/balloc.c	2009-02-19 06:50:52.000000000 +0800
@@ -16,7 +16,7 @@
 #include <linux/sched.h>
 #include <linux/buffer_head.h>
 #include <linux/capability.h>
-
+#include <linux/cgroup_disk.h>
 /*
  * balloc.c contains the blocks allocation and deallocation routines
  */
@@ -571,6 +571,8 @@
 	brelse(bitmap_bh);
 	release_blocks(sb, freed);
 	DQUOT_FREE_BLOCK(inode, freed);
+        disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+                freed << inode->i_sb->s_blocksize_bits);
 }
 
 /**
@@ -1247,11 +1249,15 @@
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (DQUOT_ALLOC_BLOCK(inode, num)) {
+	if (DQUOT_ALLOC_BLOCK(inode, num) 
+		|| disk_cgroup_check_quota(DISK_CURRENT_BLOCK,
+			num << inode->i_sb->s_blocksize_bits)) {
 		*errp = -EDQUOT;
 		return 0;
 	}
 
+        disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,1,
+                  num << inode->i_sb->s_blocksize_bits);
 	sbi = EXT2_SB(sb);
 	es = EXT2_SB(sb)->s_es;
 	ext2_debug("goal=%lu.\n", goal);
@@ -1295,7 +1301,6 @@
 	 * turn off reservation for this allocation
 	 */
 	if (my_rsv && (free_blocks < windowsz)
-		&& (free_blocks > 0)
 		&& (rsv_is_empty(&my_rsv->rsv_window)))
 		my_rsv = NULL;
 
@@ -1333,7 +1338,7 @@
 		 * free blocks is less than half of the reservation
 		 * window size.
 		 */
-		if (my_rsv && (free_blocks <= (windowsz/2)))
+		if (free_blocks <= (windowsz/2))
 			continue;
 
 		brelse(bitmap_bh);
@@ -1410,6 +1415,8 @@
 	*errp = 0;
 	brelse(bitmap_bh);
 	DQUOT_FREE_BLOCK(inode, *count-num);
+        disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+                 (*count-num) << inode->i_sb->s_blocksize_bits);
 	*count = num;
 	return ret_block;
 
@@ -1419,8 +1426,11 @@
 	/*
 	 * Undo the block allocation
 	 */
-	if (!performed_allocation)
+	if (!performed_allocation) {
 		DQUOT_FREE_BLOCK(inode, *count);
+                disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+                        (*count) << inode->i_sb->s_blocksize_bits);
+	}
 	brelse(bitmap_bh);
 	return 0;
 }
diff -Naur linux-2.6.28.5/fs/ext2/ialloc.c linux-2.6.28.5-cgroup-disk-quota/fs/ext2/ialloc.c
--- linux-2.6.28.5/fs/ext2/ialloc.c	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext2/ialloc.c	2009-02-19 06:50:51.000000000 +0800
@@ -17,6 +17,7 @@
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include <linux/random.h>
+#include <linux/cgroup_disk.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -123,6 +124,7 @@
 		ext2_xattr_delete_inode(inode);
 	    	DQUOT_FREE_INODE(inode);
 		DQUOT_DROP(inode);
+		disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
 	}
 
 	es = EXT2_SB(sb)->s_es;
@@ -587,11 +589,12 @@
 	spin_unlock(&sbi->s_next_gen_lock);
 	insert_inode_hash(inode);
 
-	if (DQUOT_ALLOC_INODE(inode)) {
+	if (DQUOT_ALLOC_INODE(inode) || disk_cgroup_check_quota(DISK_CURRENT_INODE,1)) {
 		err = -EDQUOT;
 		goto fail_drop;
 	}
 
+	disk_cgroup_acct_quota(DISK_CURRENT_INODE,1,1);
 	err = ext2_init_acl(inode, dir);
 	if (err)
 		goto fail_free_drop;
@@ -607,9 +610,11 @@
 
 fail_free_drop:
 	DQUOT_FREE_INODE(inode);
+	disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
 
 fail_drop:
 	DQUOT_DROP(inode);
+	disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	iput(inode);
diff -Naur linux-2.6.28.5/fs/ext2/xattr.c linux-2.6.28.5-cgroup-disk-quota/fs/ext2/xattr.c
--- linux-2.6.28.5/fs/ext2/xattr.c	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext2/xattr.c	2009-02-19 06:50:51.000000000 +0800
@@ -60,6 +60,7 @@
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
+#include <linux/cgroup_disk.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -640,12 +641,17 @@
 				/* The old block is released after updating
 				   the inode.  */
 				ea_bdebug(new_bh, "reusing block");
-
 				error = -EDQUOT;
-				if (DQUOT_ALLOC_BLOCK(inode, 1)) {
+				if (DQUOT_ALLOC_BLOCK(inode, 1)
+					||disk_cgroup_check_quota(DISK_CURRENT_BLOCK,
+						1 << inode->i_sb->s_blocksize_bits)) {
 					unlock_buffer(new_bh);
 					goto cleanup;
 				}
+
+                                disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,1,
+                                        1 << inode->i_sb->s_blocksize_bits);
+
 				le32_add_cpu(&HDR(new_bh)->h_refcount, 1);
 				ea_bdebug(new_bh, "refcount now=%d",
 					le32_to_cpu(HDR(new_bh)->h_refcount));
@@ -698,8 +704,11 @@
 		 * written (only some dirty data were not) so we just proceed
 		 * as if nothing happened and cleanup the unused block */
 		if (error && error != -ENOSPC) {
-			if (new_bh && new_bh != old_bh)
+			if (new_bh && new_bh != old_bh) {
 				DQUOT_FREE_BLOCK(inode, 1);
+		                disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+                		        1 << inode->i_sb->s_blocksize_bits);
+			}
 			goto cleanup;
 		}
 	} else
@@ -732,6 +741,8 @@
 			if (ce)
 				mb_cache_entry_release(ce);
 			DQUOT_FREE_BLOCK(inode, 1);
+	                disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+        	                1 << inode->i_sb->s_blocksize_bits);
 			mark_buffer_dirty(old_bh);
 			ea_bdebug(old_bh, "refcount now=%d",
 				le32_to_cpu(HDR(old_bh)->h_refcount));
diff -Naur linux-2.6.28.5/fs/ext3/balloc.c linux-2.6.28.5-cgroup-disk-quota/fs/ext3/balloc.c
--- linux-2.6.28.5/fs/ext3/balloc.c	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext3/balloc.c	2009-02-19 06:51:07.000000000 +0800
@@ -20,6 +20,8 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 
+#include <linux/cgroup_disk.h>
+
 /*
  * balloc.c contains the blocks allocation and deallocation routines
  */
@@ -675,8 +677,11 @@
 		return;
 	}
 	ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
-	if (dquot_freed_blocks)
+	if (dquot_freed_blocks) {
 		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+	        disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+		dquot_freed_blocks << inode->i_sb->s_blocksize_bits);
+	}
 	return;
 }
 
@@ -1502,10 +1507,14 @@
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (DQUOT_ALLOC_BLOCK(inode, num)) {
+	if (DQUOT_ALLOC_BLOCK(inode, num) 
+		|| disk_cgroup_check_quota(DISK_CURRENT_BLOCK,
+			num << inode->i_sb->s_blocksize_bits)) {
 		*errp = -EDQUOT;
 		return 0;
 	}
+	
+	disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,1,num << inode->i_sb->s_blocksize_bits);
 
 	sbi = EXT3_SB(sb);
 	es = EXT3_SB(sb)->s_es;
@@ -1547,7 +1556,6 @@
 	 * turn off reservation for this allocation
 	 */
 	if (my_rsv && (free_blocks < windowsz)
-		&& (free_blocks > 0)
 		&& (rsv_is_empty(&my_rsv->rsv_window)))
 		my_rsv = NULL;
 
@@ -1586,7 +1594,7 @@
 		 * free blocks is less than half of the reservation
 		 * window size.
 		 */
-		if (my_rsv && (free_blocks <= (windowsz/2)))
+		if (free_blocks <= (windowsz/2))
 			continue;
 
 		brelse(bitmap_bh);
@@ -1715,6 +1723,8 @@
 	*errp = 0;
 	brelse(bitmap_bh);
 	DQUOT_FREE_BLOCK(inode, *count-num);
+	disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+		(*count-num) << inode->i_sb->s_blocksize_bits);
 	*count = num;
 	return ret_block;
 
@@ -1728,8 +1738,11 @@
 	/*
 	 * Undo the block allocation
 	 */
-	if (!performed_allocation)
+	if (!performed_allocation) {
 		DQUOT_FREE_BLOCK(inode, *count);
+		disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+			(*count) << inode->i_sb->s_blocksize_bits);
+	}
 	brelse(bitmap_bh);
 	return 0;
 }
diff -Naur linux-2.6.28.5/fs/ext3/ialloc.c linux-2.6.28.5-cgroup-disk-quota/fs/ext3/ialloc.c
--- linux-2.6.28.5/fs/ext3/ialloc.c	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext3/ialloc.c	2009-02-19 06:51:05.000000000 +0800
@@ -25,6 +25,7 @@
 #include <linux/bitops.h>
 
 #include <asm/byteorder.h>
+#include <linux/cgroup_disk.h>
 
 #include "xattr.h"
 #include "acl.h"
@@ -126,6 +127,7 @@
 	DQUOT_INIT(inode);
 	ext3_xattr_delete_inode(handle, inode);
 	DQUOT_FREE_INODE(inode);
+	disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
 	DQUOT_DROP(inode);
 
 	is_directory = S_ISDIR(inode->i_mode);
@@ -590,11 +592,13 @@
 		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
 
 	ret = inode;
-	if(DQUOT_ALLOC_INODE(inode)) {
+	if(DQUOT_ALLOC_INODE(inode) || disk_cgroup_check_quota(DISK_CURRENT_INODE,1)) {
 		err = -EDQUOT;
 		goto fail_drop;
 	}
 
+	disk_cgroup_acct_quota(DISK_CURRENT_INODE,1,1);
+
 	err = ext3_init_acl(handle, inode, dir);
 	if (err)
 		goto fail_free_drop;
@@ -622,6 +626,7 @@
 
 fail_free_drop:
 	DQUOT_FREE_INODE(inode);
+	disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
 
 fail_drop:
 	DQUOT_DROP(inode);
diff -Naur linux-2.6.28.5/fs/ext3/xattr.c linux-2.6.28.5-cgroup-disk-quota/fs/ext3/xattr.c
--- linux-2.6.28.5/fs/ext3/xattr.c	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext3/xattr.c	2009-02-19 06:51:06.000000000 +0800
@@ -58,6 +58,7 @@
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
+#include <linux/cgroup_disk.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -499,6 +500,8 @@
 		if (IS_SYNC(inode))
 			handle->h_sync = 1;
 		DQUOT_FREE_BLOCK(inode, 1);
+		disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+			1 << inode->i_sb->s_blocksize_bits);
 		ea_bdebug(bh, "refcount now=%d; releasing",
 			  le32_to_cpu(BHDR(bh)->h_refcount));
 		if (ce)
@@ -773,9 +776,16 @@
 			else {
 				/* The old block is released after updating
 				   the inode. */
+
 				error = -EDQUOT;
-				if (DQUOT_ALLOC_BLOCK(inode, 1))
+				if (DQUOT_ALLOC_BLOCK(inode, 1) 
+					|| disk_cgroup_check_quota(DISK_CURRENT_BLOCK,
+					1 << inode->i_sb->s_blocksize_bits))
 					goto cleanup;
+
+                                disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,1,
+                                        1 << inode->i_sb->s_blocksize_bits);
+
 				error = ext3_journal_get_write_access(handle,
 								      new_bh);
 				if (error)
@@ -849,6 +859,9 @@
 
 cleanup_dquot:
 	DQUOT_FREE_BLOCK(inode, 1);
+        disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+                 1 << inode->i_sb->s_blocksize_bits);
+
 	goto cleanup;
 
 bad_block:
diff -Naur linux-2.6.28.5/include/linux/cgroup_disk.h linux-2.6.28.5-cgroup-disk-quota/include/linux/cgroup_disk.h
--- linux-2.6.28.5/include/linux/cgroup_disk.h	1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/include/linux/cgroup_disk.h	2009-02-19 06:48:51.000000000 +0800
@@ -0,0 +1,28 @@
+#ifndef CGROUP_DISK_H
+#define	CGROUP_DISK_H
+
+#include <linux/quota.h>
+
+enum {
+	DISK_MAX_USAGE_BLOCK,
+	DISK_CURRENT_BLOCK,
+	DISK_LIMIT_BLOCK,
+
+	DISK_MAX_USAGE_INODE,
+	DISK_CURRENT_INODE,
+	DISK_LIMIT_INODE,
+
+	DISK_USAGE_STAT,
+};
+
+#ifdef CONFIG_CGROUP_DISK
+extern void disk_cgroup_acct_stat(struct dqstats *pstat);
+extern void disk_cgroup_acct_quota(int dq_type, int inc, unsigned long long number);
+extern int disk_cgroup_check_quota(int dq_type, unsigned long long number);
+#else
+static inline void disk_cgroup_acct_stat(struct dqstats *pstat) { }
+static inline void disk_cgroup_acct_quota(int dq_type, int inc, unsigned long long number) { }
+static inline int disk_cgroup_check_quota(int dq_type, unsigned long long number) { }
+#endif /* CONFIG_CGROUP_DISKQUOTA */
+
+#endif
diff -Naur linux-2.6.28.5/include/linux/cgroup_subsys.h linux-2.6.28.5-cgroup-disk-quota/include/linux/cgroup_subsys.h
--- linux-2.6.28.5/include/linux/cgroup_subsys.h	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/include/linux/cgroup_subsys.h	2009-02-19 06:48:52.000000000 +0800
@@ -53,4 +53,8 @@
 SUBSYS(freezer)
 #endif
 
+#ifdef CONFIG_CGROUP_DISK
+SUBSYS(disk_cgroup)
+#endif
+
 /* */
diff -Naur linux-2.6.28.5/init/Kconfig linux-2.6.28.5-cgroup-disk-quota/init/Kconfig
--- linux-2.6.28.5/init/Kconfig	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/init/Kconfig	2009-02-19 06:50:43.000000000 +0800
@@ -313,6 +313,16 @@
 	  Provides a cgroup implementing whitelists for devices which
 	  a process in the cgroup can mknod or open.
 
+
+config CGROUP_DISK
+        bool "Enable cgroup disk quota limitinig (EXPERIMENTAL)"
+        depends on EXPERIMENTAL && CGROUPS
+        help
+          This allows to define disk quota limiting/shaping rules for
+          specific cgroup(s).
+
+          Say N if unsure.
+
 config CPUSETS
 	bool "Cpuset support"
 	depends on SMP && CGROUPS
diff -Naur linux-2.6.28.5/kernel/cgroup_disk.c linux-2.6.28.5-cgroup-disk-quota/kernel/cgroup_disk.c
--- linux-2.6.28.5/kernel/cgroup_disk.c	1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/kernel/cgroup_disk.c	2009-02-19 07:41:30.000000000 +0800
@@ -0,0 +1,397 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * Writen by An Qin <anqin.qin@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/quota.h>
+
+#include <linux/cgroup_disk.h>
+
+
+struct disk_cgroup_stat
+{
+        int lookups;
+        int drops;
+        int reads;
+        int writes;
+        int cache_hits;
+        int allocated_dquots;
+        int free_dquots;
+        int syncs;
+};
+
+struct disk_cgroup_quota 
+{
+        qsize_t dqb_bhardlimit;  
+        qsize_t dqb_bsoftlimit;  
+        qsize_t dqb_curspace;  
+        qsize_t dqb_ihardlimit;  
+        qsize_t dqb_isoftlimit;  
+        qsize_t dqb_curinodes;   
+        time_t dqb_btime;      
+        time_t dqb_itime;      
+};
+
+
+struct disk_cgroup
+{
+	struct cgroup_subsys_state css;
+	spinlock_t lock;
+	struct disk_cgroup_quota quota;
+	struct disk_cgroup_stat stat;
+};
+
+
+static inline struct disk_cgroup *cgroup_to_disk_cgroup(struct cgroup *cont)
+{
+	return container_of(cgroup_subsys_state(cont, disk_cgroup_subsys_id),
+			    struct disk_cgroup, css);
+}
+
+static inline struct disk_cgroup *task_to_disk_cgroup(struct task_struct *task)
+{
+	return container_of(task_subsys_state(task, disk_cgroup_subsys_id),
+			    struct disk_cgroup, css);
+}
+
+struct cgroup_subsys disk_cgroup_subsys;
+
+static struct cgroup_subsys_state *disk_cgroup_create(
+			struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	struct disk_cgroup *disk;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	if (!cgroup_is_descendant(cont))
+		return ERR_PTR(-EPERM);
+	
+	disk = kzalloc(sizeof(struct disk_cgroup), GFP_KERNEL);
+	if (unlikely(!disk))
+		return ERR_PTR(-ENOMEM);
+
+	//memset(disk,0,sizeof(*disk));
+	spin_lock_init(&disk->lock);
+
+	return &disk->css;
+}
+
+static void disk_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	kfree(cgroup_to_disk_cgroup(cont));
+}
+
+
+static ssize_t disk_cgroup_read_stat(struct cgroup *cont, struct cftype *cft,
+			       struct file *file, char __user *buf,
+			       size_t nbytes, loff_t *ppos)
+{
+	ssize_t count, ret;
+        struct disk_cgroup_stat stat;
+	struct disk_cgroup *disk;
+	char *page;
+
+	page = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!page)
+		return -ENOMEM;
+
+	cgroup_lock();
+	if (cgroup_is_removed(cont)) {
+		cgroup_unlock();
+		ret = -ENODEV;
+		goto out;
+	}
+
+	disk = cgroup_to_disk_cgroup(cont);
+	spin_lock_irq(&disk->lock);
+
+	/* may cause segment fault, pay attention */
+	memcpy(&stat,&(disk->stat),sizeof(stat));
+
+	spin_unlock_irq(&disk->lock);
+	cgroup_unlock();
+
+	/* print additional debugging stuff */
+	count = sprintf(page, 	"            type: %s\n"
+			      	"         lookups: %d\n"
+			      	"           drops: %d\n"
+			      	"           reads: %d\n"
+			      	"          writes: %d\n"
+				"      cache_hits: %d\n"
+				"allocated_dquots: %d\n"
+				"     free_dquots: %d\n"
+				"           syncs: %d\n",
+			      	cft->name,
+			      	stat.lookups, stat.drops, stat.reads, 
+				stat.writes, stat.cache_hits,
+				stat.allocated_dquots, stat.free_dquots,
+				stat.syncs);
+
+	ret = simple_read_from_buffer(buf, nbytes, ppos, page, count);
+
+out:
+	free_page((unsigned long)page);
+	return ret;
+}
+
+static ssize_t disk_cgroup_read_quota(struct cgroup *cont, struct cftype *cft,
+                               struct file *file, char __user *buf,
+                               size_t nbytes, loff_t *ppos)
+{
+        ssize_t count, ret = 0;
+        struct disk_cgroup_quota quota;
+        struct disk_cgroup_stat stat;
+        struct disk_cgroup *disk;
+        char *page;
+
+        page = (char *)__get_free_page(GFP_TEMPORARY);
+        if (!page)
+                return -ENOMEM;
+
+        cgroup_lock();
+        if (cgroup_is_removed(cont)) {
+                cgroup_unlock();
+                ret = -ENODEV;
+                goto out;
+        }
+
+        disk = cgroup_to_disk_cgroup(cont);
+        spin_lock_irq(&disk->lock);
+
+        /* may cause segment fault, pay attention */
+        memcpy(&quota,&(disk->quota),sizeof(quota));
+        memcpy(&stat,&(disk->stat),sizeof(stat));
+
+        spin_unlock_irq(&disk->lock);
+        cgroup_unlock();
+
+	switch(cft->private) {
+		case DISK_USAGE_STAT:
+        	/* print additional debugging stuff */
+	        count = sprintf(page,   "            type: %s\n"
+        	                        "         lookups: %d\n"
+                	                "           drops: %d\n"
+                        	        "           reads: %d\n"
+	                                "          writes: %d\n"
+        	                        "      cache_hits: %d\n"
+                	                "allocated_dquots: %d\n"
+                        	        "     free_dquots: %d\n"
+	                                "           syncs: %d\n",
+	                                cft->name,
+        	                        stat.lookups, stat.drops, stat.reads,
+                	                stat.writes, stat.cache_hits,
+                        	        stat.allocated_dquots, stat.free_dquots,
+	                                stat.syncs);
+		break;
+		case DISK_CURRENT_BLOCK:
+                count = sprintf(page,   "current usage of block: %llu\n",
+					quota.dqb_curspace);
+		break;
+		case DISK_CURRENT_INODE:
+                count = sprintf(page,   "current usage of inode: %llu\n",
+                                        quota.dqb_curinodes);
+		break;
+		case DISK_MAX_USAGE_BLOCK:
+                count = sprintf(page,   "%llu\n",
+                                        quota.dqb_bhardlimit);
+		break;
+		case DISK_MAX_USAGE_INODE:
+                count = sprintf(page,   "%llu\n",
+                                        quota.dqb_ihardlimit);
+		break;
+		case DISK_LIMIT_BLOCK:
+                count = sprintf(page,   "%llu\n",
+                                        quota.dqb_bsoftlimit);
+		break;
+		case DISK_LIMIT_INODE:
+                count = sprintf(page,   "%llu\n",
+                                        quota.dqb_isoftlimit);
+                break;
+		default:
+			goto out;
+
+	}
+        ret = simple_read_from_buffer(buf, nbytes, ppos, page, count);
+
+out:
+        free_page((unsigned long)page);
+        return ret;
+}
+
+
+static int disk_cgroup_write_u64(struct cgroup *cont, struct cftype *cft,
+				 u64 val)
+{
+	struct disk_cgroup *disk;
+	int ret = 0;
+
+	cgroup_lock();
+	if (cgroup_is_removed(cont)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	disk = cgroup_to_disk_cgroup(cont);
+
+	spin_lock_irq(&disk->lock);
+	switch(cft->private)
+	{
+		case DISK_MAX_USAGE_BLOCK:
+			disk->quota.dqb_bhardlimit = (unsigned long long) val;
+			break;
+		case DISK_MAX_USAGE_INODE:
+			disk->quota.dqb_ihardlimit = (unsigned long long) val;
+			break;
+		case DISK_LIMIT_BLOCK:
+			disk->quota.dqb_bsoftlimit = (unsigned long long) val;
+			break;
+		case DISK_LIMIT_INODE:
+			disk->quota.dqb_isoftlimit = (unsigned long long) val;
+			break;
+		default:
+			break;
+	}
+	spin_unlock_irq(&disk->lock);
+
+out:
+	cgroup_unlock();
+	return ret;
+}
+
+
+static struct cftype disk_cgroup_files[] = {
+	{ 
+		.name = "stat", 
+		.read = disk_cgroup_read_stat,
+		.private = DISK_USAGE_STAT,
+	},
+        {
+                .name = "usage_in_block",
+                .read = disk_cgroup_read_quota,
+                .private = DISK_CURRENT_BLOCK,
+        },
+        {
+                .name = "usage_in_inode",
+                .read = disk_cgroup_read_quota,
+                .private = DISK_CURRENT_INODE,
+        },
+        {
+                .name = "max_usage_in_block",
+                .read = disk_cgroup_read_quota,
+                .write_u64 = disk_cgroup_write_u64,
+                .private = DISK_MAX_USAGE_BLOCK,
+        },
+        {
+                .name = "max_usage_in_inode",
+                .read = disk_cgroup_read_quota,
+                .write_u64 = disk_cgroup_write_u64,
+                .private = DISK_MAX_USAGE_INODE,
+        },
+        {
+                .name = "limit_in_block",
+                .read = disk_cgroup_read_quota,
+                .write_u64 = disk_cgroup_write_u64,
+                .private = DISK_LIMIT_BLOCK,
+        },
+        {
+                .name = "limit_in_inode",
+                .read = disk_cgroup_read_quota,
+                .write_u64 = disk_cgroup_write_u64,
+                .private = DISK_LIMIT_INODE,
+        },
+};
+
+static int disk_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	return cgroup_add_files(cont, ss, disk_cgroup_files, ARRAY_SIZE(disk_cgroup_files));
+}
+
+struct cgroup_subsys disk_cgroup_subsys = {
+	.name = "disk",
+	.create = disk_cgroup_create,
+	.destroy = disk_cgroup_destroy,
+	.populate = disk_cgroup_populate,
+	.subsys_id = disk_cgroup_subsys_id,
+};
+
+void disk_cgroup_acct_stat(struct dqstats *pstat)
+{
+	struct disk_cgroup *disk;
+
+	disk = task_to_disk_cgroup(current);
+	if (!disk)
+		return;
+
+	disk->stat.lookups += pstat->lookups;
+	disk->stat.drops += pstat->drops;
+	disk->stat.reads += pstat->reads;
+	disk->stat.writes += pstat->writes;
+	disk->stat.cache_hits += pstat->cache_hits;
+	disk->stat.allocated_dquots += pstat->allocated_dquots;
+	disk->stat.free_dquots += pstat->free_dquots;
+	disk->stat.syncs += pstat->syncs;
+}
+EXPORT_SYMBOL(disk_cgroup_acct_stat);
+
+void disk_cgroup_acct_quota(int dq_type, int inc, unsigned long long number)
+{
+        struct disk_cgroup *disk;
+
+        disk = task_to_disk_cgroup(current);
+        if (!disk)
+                return;
+
+	if(dq_type == DISK_CURRENT_BLOCK ) {
+		if(inc)
+        		disk->quota.dqb_curspace += number;
+		else if(disk->quota.dqb_curspace > number)
+			disk->quota.dqb_curspace -= number;
+		else disk->quota.dqb_curspace = 0;
+	}
+	else if(dq_type == DISK_CURRENT_INODE) {
+		if(inc)
+			disk->quota.dqb_curinodes += number;
+		else if(disk->quota.dqb_curinodes > number)
+			disk->quota.dqb_curinodes -= number;
+		else disk->quota.dqb_curinodes = 0;
+	}
+}
+EXPORT_SYMBOL(disk_cgroup_acct_quota);
+
+int disk_cgroup_check_quota(int dq_type, unsigned long long number)
+{
+        struct disk_cgroup *disk;
+	int ret = 0;
+
+        disk = task_to_disk_cgroup(current);
+        if (!disk)
+                return ret;
+
+
+        if(dq_type == DISK_CURRENT_BLOCK 
+		&& disk->quota.dqb_bhardlimit > 0
+		&& disk->quota.dqb_curspace + number > disk->quota.dqb_bhardlimit)
+        	ret = -1;
+        else if(dq_type == DISK_CURRENT_INODE
+                && disk->quota.dqb_ihardlimit > 0
+                && disk->quota.dqb_curinodes + number > disk->quota.dqb_ihardlimit)
+                ret = -1;
+
+	return ret;
+}
+EXPORT_SYMBOL(disk_cgroup_check_quota);
diff -Naur linux-2.6.28.5/kernel/cgroup_disk.h linux-2.6.28.5-cgroup-disk-quota/kernel/cgroup_disk.h
--- linux-2.6.28.5/kernel/cgroup_disk.h	1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/kernel/cgroup_disk.h	2009-02-19 06:52:02.000000000 +0800
@@ -0,0 +1,26 @@
+#ifndef CGROUP_DISK_H
+#define	CGROUP_DISK_H
+
+#include <linux/quota.h>
+
+enum {
+	DISK_MAX_USAGE_BLOCK,
+	DISK_CURRENT_BLOCK,
+	DISK_LIMIT_BLOCK,
+
+	DISK_MAX_USAGE_INODE,
+	DISK_CURRENT_INODE,
+	DISK_LIMIT_INODE,
+
+	DISK_USAGE_STAT,
+};
+
+#ifdef CONFIG_CGROUP_DISK
+extern void disk_cgroup_acct_stat(struct dqstats *pstat);
+extern void disk_cgroup_acct_quota(int dq_type, int inc, unsigned long number);
+#else
+static inline void disk_cgroup_acct_stat(struct dqstats *pstat) { }
+static inline void disk_cgroup_acct_quota(int dq_type, int inc, unsigned long number) { }
+#endif /* CONFIG_CGROUP_DISKQUOTA */
+
+#endif
diff -Naur linux-2.6.28.5/kernel/Makefile linux-2.6.28.5-cgroup-disk-quota/kernel/Makefile
--- linux-2.6.28.5/kernel/Makefile	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/kernel/Makefile	2009-02-19 06:52:04.000000000 +0800
@@ -55,6 +55,7 @@
 obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
 obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
+obj-$(CONFIG_CGROUP_FREEZER) += cgroup_disk.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_UTS_NS) += utsname.o

[-- Attachment #3: Type: text/plain, Size: 206 bytes --]

_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linux-foundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Fwd: [PATCH] cgroup for disk quota
       [not found] ` <d95d44a20902200028h1e229cc0pa3cdd4f42814e78e-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-02-20 10:21   ` anqin
       [not found]     ` <d95d44a20902200221w67ee1b49ua6027f3090186af9-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2009-02-20 13:45   ` Daniel Lezcano
  1 sibling, 1 reply; 9+ messages in thread
From: anqin @ 2009-02-20 10:21 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	menage-hpIqsD4AKlfQT0dZR+AlfA

[-- Attachment #1: Type: text/plain, Size: 1954 bytes --]

---------- Forwarded message ----------
From: anqin <anqin.qin-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date: Fri, Feb 20, 2009 at 4:28 PM
Subject: [PATCH] cgroup for disk quota
To: Daniel Lezcano <dlezcano-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>, "Serge E. Hallyn"
<serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>, containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org
Cc: Ian jonhson <jonhson.ian-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>


Dear all,

For unified management of resources (CPU, memory, disk, network),
I (and Ian) developed a cgroup subsystem to control the usage
of disk quota.

The subsystem for disk quota (disk_cgroup, to be brief) does accounting
of inode and block allocated by ext3/ext2 filesystem. Simarily as
filesystem quota, the disk_cgroup can do limitation but without needing
to open filesytem quota options (e.g. usrquota,grpquota in /etc/fstab).
Since this patch is first developed, it needs more feedback and testing
from other developers or users.

The simple usage of disk_cgroup is as follows:

# mount -t cgroup cgroup /mnt/cgrp
# lxc-execute -n lxc-template.conf /bin/bash
# ls /mnt/cgrp/11457/           // <--  11457 is the pid of bash
...
disk.stat
disk.usage_in_inode
disk.usage_in_block
disk.max_usage_in_inode
disk.max_usage_in_block
disk.limit_in_inode
disk.limit_in_inode
...

# echo  3 > /mnt/cgrp/11457/disk.max_usage_in_block

# touch /tmp/mytestfile1
# touch /tmp/mytestfile2
# touch /tmp/mytestfile3
# touch /tmp/mytestfile4
touch: cannot touch `/tmp/mytestfile4': Disk quota exceeded

The disk_cgroup is easily extended to manage complex objects
of filesystem.

BTW, I don't know how to submit a "useful" patch to kernel community. Or,
maybe the patch is not useful at all and maybe has been developed by other
developers. I very appreciate if experts could give me some commend.
I will continue to develop cgroup-related codes to make contribution to kernel
development.

Any comment is welcome,

Anqin

[-- Attachment #2: linux-2.6.28.5-cgroup-disk-quota.patch --]
[-- Type: application/octet-stream, Size: 25421 bytes --]

diff -Naur linux-2.6.28.5/fs/ext2/balloc.c linux-2.6.28.5-cgroup-disk-quota/fs/ext2/balloc.c
--- linux-2.6.28.5/fs/ext2/balloc.c	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext2/balloc.c	2009-02-19 06:50:52.000000000 +0800
@@ -16,7 +16,7 @@
 #include <linux/sched.h>
 #include <linux/buffer_head.h>
 #include <linux/capability.h>
-
+#include <linux/cgroup_disk.h>
 /*
  * balloc.c contains the blocks allocation and deallocation routines
  */
@@ -571,6 +571,8 @@
 	brelse(bitmap_bh);
 	release_blocks(sb, freed);
 	DQUOT_FREE_BLOCK(inode, freed);
+        disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+                freed << inode->i_sb->s_blocksize_bits);
 }
 
 /**
@@ -1247,11 +1249,15 @@
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (DQUOT_ALLOC_BLOCK(inode, num)) {
+	if (DQUOT_ALLOC_BLOCK(inode, num) 
+		|| disk_cgroup_check_quota(DISK_CURRENT_BLOCK,
+			num << inode->i_sb->s_blocksize_bits)) {
 		*errp = -EDQUOT;
 		return 0;
 	}
 
+        disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,1,
+                  num << inode->i_sb->s_blocksize_bits);
 	sbi = EXT2_SB(sb);
 	es = EXT2_SB(sb)->s_es;
 	ext2_debug("goal=%lu.\n", goal);
@@ -1295,7 +1301,6 @@
 	 * turn off reservation for this allocation
 	 */
 	if (my_rsv && (free_blocks < windowsz)
-		&& (free_blocks > 0)
 		&& (rsv_is_empty(&my_rsv->rsv_window)))
 		my_rsv = NULL;
 
@@ -1333,7 +1338,7 @@
 		 * free blocks is less than half of the reservation
 		 * window size.
 		 */
-		if (my_rsv && (free_blocks <= (windowsz/2)))
+		if (free_blocks <= (windowsz/2))
 			continue;
 
 		brelse(bitmap_bh);
@@ -1410,6 +1415,8 @@
 	*errp = 0;
 	brelse(bitmap_bh);
 	DQUOT_FREE_BLOCK(inode, *count-num);
+        disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+                 (*count-num) << inode->i_sb->s_blocksize_bits);
 	*count = num;
 	return ret_block;
 
@@ -1419,8 +1426,11 @@
 	/*
 	 * Undo the block allocation
 	 */
-	if (!performed_allocation)
+	if (!performed_allocation) {
 		DQUOT_FREE_BLOCK(inode, *count);
+                disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+                        (*count) << inode->i_sb->s_blocksize_bits);
+	}
 	brelse(bitmap_bh);
 	return 0;
 }
diff -Naur linux-2.6.28.5/fs/ext2/ialloc.c linux-2.6.28.5-cgroup-disk-quota/fs/ext2/ialloc.c
--- linux-2.6.28.5/fs/ext2/ialloc.c	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext2/ialloc.c	2009-02-19 06:50:51.000000000 +0800
@@ -17,6 +17,7 @@
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include <linux/random.h>
+#include <linux/cgroup_disk.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -123,6 +124,7 @@
 		ext2_xattr_delete_inode(inode);
 	    	DQUOT_FREE_INODE(inode);
 		DQUOT_DROP(inode);
+		disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
 	}
 
 	es = EXT2_SB(sb)->s_es;
@@ -587,11 +589,12 @@
 	spin_unlock(&sbi->s_next_gen_lock);
 	insert_inode_hash(inode);
 
-	if (DQUOT_ALLOC_INODE(inode)) {
+	if (DQUOT_ALLOC_INODE(inode) || disk_cgroup_check_quota(DISK_CURRENT_INODE,1)) {
 		err = -EDQUOT;
 		goto fail_drop;
 	}
 
+	disk_cgroup_acct_quota(DISK_CURRENT_INODE,1,1);
 	err = ext2_init_acl(inode, dir);
 	if (err)
 		goto fail_free_drop;
@@ -607,9 +610,11 @@
 
 fail_free_drop:
 	DQUOT_FREE_INODE(inode);
+	disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
 
 fail_drop:
 	DQUOT_DROP(inode);
+	disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	iput(inode);
diff -Naur linux-2.6.28.5/fs/ext2/xattr.c linux-2.6.28.5-cgroup-disk-quota/fs/ext2/xattr.c
--- linux-2.6.28.5/fs/ext2/xattr.c	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext2/xattr.c	2009-02-19 06:50:51.000000000 +0800
@@ -60,6 +60,7 @@
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
+#include <linux/cgroup_disk.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -640,12 +641,17 @@
 				/* The old block is released after updating
 				   the inode.  */
 				ea_bdebug(new_bh, "reusing block");
-
 				error = -EDQUOT;
-				if (DQUOT_ALLOC_BLOCK(inode, 1)) {
+				if (DQUOT_ALLOC_BLOCK(inode, 1)
+					||disk_cgroup_check_quota(DISK_CURRENT_BLOCK,
+						1 << inode->i_sb->s_blocksize_bits)) {
 					unlock_buffer(new_bh);
 					goto cleanup;
 				}
+
+                                disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,1,
+                                        1 << inode->i_sb->s_blocksize_bits);
+
 				le32_add_cpu(&HDR(new_bh)->h_refcount, 1);
 				ea_bdebug(new_bh, "refcount now=%d",
 					le32_to_cpu(HDR(new_bh)->h_refcount));
@@ -698,8 +704,11 @@
 		 * written (only some dirty data were not) so we just proceed
 		 * as if nothing happened and cleanup the unused block */
 		if (error && error != -ENOSPC) {
-			if (new_bh && new_bh != old_bh)
+			if (new_bh && new_bh != old_bh) {
 				DQUOT_FREE_BLOCK(inode, 1);
+		                disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+                		        1 << inode->i_sb->s_blocksize_bits);
+			}
 			goto cleanup;
 		}
 	} else
@@ -732,6 +741,8 @@
 			if (ce)
 				mb_cache_entry_release(ce);
 			DQUOT_FREE_BLOCK(inode, 1);
+	                disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+        	                1 << inode->i_sb->s_blocksize_bits);
 			mark_buffer_dirty(old_bh);
 			ea_bdebug(old_bh, "refcount now=%d",
 				le32_to_cpu(HDR(old_bh)->h_refcount));
diff -Naur linux-2.6.28.5/fs/ext3/balloc.c linux-2.6.28.5-cgroup-disk-quota/fs/ext3/balloc.c
--- linux-2.6.28.5/fs/ext3/balloc.c	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext3/balloc.c	2009-02-19 06:51:07.000000000 +0800
@@ -20,6 +20,8 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 
+#include <linux/cgroup_disk.h>
+
 /*
  * balloc.c contains the blocks allocation and deallocation routines
  */
@@ -675,8 +677,11 @@
 		return;
 	}
 	ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
-	if (dquot_freed_blocks)
+	if (dquot_freed_blocks) {
 		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+	        disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+		dquot_freed_blocks << inode->i_sb->s_blocksize_bits);
+	}
 	return;
 }
 
@@ -1502,10 +1507,14 @@
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (DQUOT_ALLOC_BLOCK(inode, num)) {
+	if (DQUOT_ALLOC_BLOCK(inode, num) 
+		|| disk_cgroup_check_quota(DISK_CURRENT_BLOCK,
+			num << inode->i_sb->s_blocksize_bits)) {
 		*errp = -EDQUOT;
 		return 0;
 	}
+	
+	disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,1,num << inode->i_sb->s_blocksize_bits);
 
 	sbi = EXT3_SB(sb);
 	es = EXT3_SB(sb)->s_es;
@@ -1547,7 +1556,6 @@
 	 * turn off reservation for this allocation
 	 */
 	if (my_rsv && (free_blocks < windowsz)
-		&& (free_blocks > 0)
 		&& (rsv_is_empty(&my_rsv->rsv_window)))
 		my_rsv = NULL;
 
@@ -1586,7 +1594,7 @@
 		 * free blocks is less than half of the reservation
 		 * window size.
 		 */
-		if (my_rsv && (free_blocks <= (windowsz/2)))
+		if (free_blocks <= (windowsz/2))
 			continue;
 
 		brelse(bitmap_bh);
@@ -1715,6 +1723,8 @@
 	*errp = 0;
 	brelse(bitmap_bh);
 	DQUOT_FREE_BLOCK(inode, *count-num);
+	disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+		(*count-num) << inode->i_sb->s_blocksize_bits);
 	*count = num;
 	return ret_block;
 
@@ -1728,8 +1738,11 @@
 	/*
 	 * Undo the block allocation
 	 */
-	if (!performed_allocation)
+	if (!performed_allocation) {
 		DQUOT_FREE_BLOCK(inode, *count);
+		disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+			(*count) << inode->i_sb->s_blocksize_bits);
+	}
 	brelse(bitmap_bh);
 	return 0;
 }
diff -Naur linux-2.6.28.5/fs/ext3/ialloc.c linux-2.6.28.5-cgroup-disk-quota/fs/ext3/ialloc.c
--- linux-2.6.28.5/fs/ext3/ialloc.c	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext3/ialloc.c	2009-02-19 06:51:05.000000000 +0800
@@ -25,6 +25,7 @@
 #include <linux/bitops.h>
 
 #include <asm/byteorder.h>
+#include <linux/cgroup_disk.h>
 
 #include "xattr.h"
 #include "acl.h"
@@ -126,6 +127,7 @@
 	DQUOT_INIT(inode);
 	ext3_xattr_delete_inode(handle, inode);
 	DQUOT_FREE_INODE(inode);
+	disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
 	DQUOT_DROP(inode);
 
 	is_directory = S_ISDIR(inode->i_mode);
@@ -590,11 +592,13 @@
 		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
 
 	ret = inode;
-	if(DQUOT_ALLOC_INODE(inode)) {
+	if(DQUOT_ALLOC_INODE(inode) || disk_cgroup_check_quota(DISK_CURRENT_INODE,1)) {
 		err = -EDQUOT;
 		goto fail_drop;
 	}
 
+	disk_cgroup_acct_quota(DISK_CURRENT_INODE,1,1);
+
 	err = ext3_init_acl(handle, inode, dir);
 	if (err)
 		goto fail_free_drop;
@@ -622,6 +626,7 @@
 
 fail_free_drop:
 	DQUOT_FREE_INODE(inode);
+	disk_cgroup_acct_quota(DISK_CURRENT_INODE,0,1);
 
 fail_drop:
 	DQUOT_DROP(inode);
diff -Naur linux-2.6.28.5/fs/ext3/xattr.c linux-2.6.28.5-cgroup-disk-quota/fs/ext3/xattr.c
--- linux-2.6.28.5/fs/ext3/xattr.c	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/fs/ext3/xattr.c	2009-02-19 06:51:06.000000000 +0800
@@ -58,6 +58,7 @@
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
+#include <linux/cgroup_disk.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -499,6 +500,8 @@
 		if (IS_SYNC(inode))
 			handle->h_sync = 1;
 		DQUOT_FREE_BLOCK(inode, 1);
+		disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+			1 << inode->i_sb->s_blocksize_bits);
 		ea_bdebug(bh, "refcount now=%d; releasing",
 			  le32_to_cpu(BHDR(bh)->h_refcount));
 		if (ce)
@@ -773,9 +776,16 @@
 			else {
 				/* The old block is released after updating
 				   the inode. */
+
 				error = -EDQUOT;
-				if (DQUOT_ALLOC_BLOCK(inode, 1))
+				if (DQUOT_ALLOC_BLOCK(inode, 1) 
+					|| disk_cgroup_check_quota(DISK_CURRENT_BLOCK,
+					1 << inode->i_sb->s_blocksize_bits))
 					goto cleanup;
+
+                                disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,1,
+                                        1 << inode->i_sb->s_blocksize_bits);
+
 				error = ext3_journal_get_write_access(handle,
 								      new_bh);
 				if (error)
@@ -849,6 +859,9 @@
 
 cleanup_dquot:
 	DQUOT_FREE_BLOCK(inode, 1);
+        disk_cgroup_acct_quota(DISK_CURRENT_BLOCK,0,
+                 1 << inode->i_sb->s_blocksize_bits);
+
 	goto cleanup;
 
 bad_block:
diff -Naur linux-2.6.28.5/include/linux/cgroup_disk.h linux-2.6.28.5-cgroup-disk-quota/include/linux/cgroup_disk.h
--- linux-2.6.28.5/include/linux/cgroup_disk.h	1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/include/linux/cgroup_disk.h	2009-02-19 06:48:51.000000000 +0800
@@ -0,0 +1,28 @@
+#ifndef CGROUP_DISK_H
+#define	CGROUP_DISK_H
+
+#include <linux/quota.h>
+
+enum {
+	DISK_MAX_USAGE_BLOCK,
+	DISK_CURRENT_BLOCK,
+	DISK_LIMIT_BLOCK,
+
+	DISK_MAX_USAGE_INODE,
+	DISK_CURRENT_INODE,
+	DISK_LIMIT_INODE,
+
+	DISK_USAGE_STAT,
+};
+
+#ifdef CONFIG_CGROUP_DISK
+extern void disk_cgroup_acct_stat(struct dqstats *pstat);
+extern void disk_cgroup_acct_quota(int dq_type, int inc, unsigned long long number);
+extern int disk_cgroup_check_quota(int dq_type, unsigned long long number);
+#else
+static inline void disk_cgroup_acct_stat(struct dqstats *pstat) { }
+static inline void disk_cgroup_acct_quota(int dq_type, int inc, unsigned long long number) { }
+static inline int disk_cgroup_check_quota(int dq_type, unsigned long long number) { }
+#endif /* CONFIG_CGROUP_DISKQUOTA */
+
+#endif
diff -Naur linux-2.6.28.5/include/linux/cgroup_subsys.h linux-2.6.28.5-cgroup-disk-quota/include/linux/cgroup_subsys.h
--- linux-2.6.28.5/include/linux/cgroup_subsys.h	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/include/linux/cgroup_subsys.h	2009-02-19 06:48:52.000000000 +0800
@@ -53,4 +53,8 @@
 SUBSYS(freezer)
 #endif
 
+#ifdef CONFIG_CGROUP_DISK
+SUBSYS(disk_cgroup)
+#endif
+
 /* */
diff -Naur linux-2.6.28.5/init/Kconfig linux-2.6.28.5-cgroup-disk-quota/init/Kconfig
--- linux-2.6.28.5/init/Kconfig	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/init/Kconfig	2009-02-19 06:50:43.000000000 +0800
@@ -313,6 +313,16 @@
 	  Provides a cgroup implementing whitelists for devices which
 	  a process in the cgroup can mknod or open.
 
+
+config CGROUP_DISK
+        bool "Enable cgroup disk quota limitinig (EXPERIMENTAL)"
+        depends on EXPERIMENTAL && CGROUPS
+        help
+          This allows to define disk quota limiting/shaping rules for
+          specific cgroup(s).
+
+          Say N if unsure.
+
 config CPUSETS
 	bool "Cpuset support"
 	depends on SMP && CGROUPS
diff -Naur linux-2.6.28.5/kernel/cgroup_disk.c linux-2.6.28.5-cgroup-disk-quota/kernel/cgroup_disk.c
--- linux-2.6.28.5/kernel/cgroup_disk.c	1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/kernel/cgroup_disk.c	2009-02-19 07:41:30.000000000 +0800
@@ -0,0 +1,397 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * 
+ * Writen by An Qin <anqin.qin@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/quota.h>
+
+#include <linux/cgroup_disk.h>
+
+
+struct disk_cgroup_stat
+{
+        int lookups;
+        int drops;
+        int reads;
+        int writes;
+        int cache_hits;
+        int allocated_dquots;
+        int free_dquots;
+        int syncs;
+};
+
+struct disk_cgroup_quota 
+{
+        qsize_t dqb_bhardlimit;  
+        qsize_t dqb_bsoftlimit;  
+        qsize_t dqb_curspace;  
+        qsize_t dqb_ihardlimit;  
+        qsize_t dqb_isoftlimit;  
+        qsize_t dqb_curinodes;   
+        time_t dqb_btime;      
+        time_t dqb_itime;      
+};
+
+
+struct disk_cgroup
+{
+	struct cgroup_subsys_state css;
+	spinlock_t lock;
+	struct disk_cgroup_quota quota;
+	struct disk_cgroup_stat stat;
+};
+
+
+static inline struct disk_cgroup *cgroup_to_disk_cgroup(struct cgroup *cont)
+{
+	return container_of(cgroup_subsys_state(cont, disk_cgroup_subsys_id),
+			    struct disk_cgroup, css);
+}
+
+static inline struct disk_cgroup *task_to_disk_cgroup(struct task_struct *task)
+{
+	return container_of(task_subsys_state(task, disk_cgroup_subsys_id),
+			    struct disk_cgroup, css);
+}
+
+struct cgroup_subsys disk_cgroup_subsys;
+
+static struct cgroup_subsys_state *disk_cgroup_create(
+			struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	struct disk_cgroup *disk;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	if (!cgroup_is_descendant(cont))
+		return ERR_PTR(-EPERM);
+	
+	disk = kzalloc(sizeof(struct disk_cgroup), GFP_KERNEL);
+	if (unlikely(!disk))
+		return ERR_PTR(-ENOMEM);
+
+	//memset(disk,0,sizeof(*disk));
+	spin_lock_init(&disk->lock);
+
+	return &disk->css;
+}
+
+static void disk_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	kfree(cgroup_to_disk_cgroup(cont));
+}
+
+
+static ssize_t disk_cgroup_read_stat(struct cgroup *cont, struct cftype *cft,
+			       struct file *file, char __user *buf,
+			       size_t nbytes, loff_t *ppos)
+{
+	ssize_t count, ret;
+        struct disk_cgroup_stat stat;
+	struct disk_cgroup *disk;
+	char *page;
+
+	page = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!page)
+		return -ENOMEM;
+
+	cgroup_lock();
+	if (cgroup_is_removed(cont)) {
+		cgroup_unlock();
+		ret = -ENODEV;
+		goto out;
+	}
+
+	disk = cgroup_to_disk_cgroup(cont);
+	spin_lock_irq(&disk->lock);
+
+	/* may cause segment fault, pay attention */
+	memcpy(&stat,&(disk->stat),sizeof(stat));
+
+	spin_unlock_irq(&disk->lock);
+	cgroup_unlock();
+
+	/* print additional debugging stuff */
+	count = sprintf(page, 	"            type: %s\n"
+			      	"         lookups: %d\n"
+			      	"           drops: %d\n"
+			      	"           reads: %d\n"
+			      	"          writes: %d\n"
+				"      cache_hits: %d\n"
+				"allocated_dquots: %d\n"
+				"     free_dquots: %d\n"
+				"           syncs: %d\n",
+			      	cft->name,
+			      	stat.lookups, stat.drops, stat.reads, 
+				stat.writes, stat.cache_hits,
+				stat.allocated_dquots, stat.free_dquots,
+				stat.syncs);
+
+	ret = simple_read_from_buffer(buf, nbytes, ppos, page, count);
+
+out:
+	free_page((unsigned long)page);
+	return ret;
+}
+
+static ssize_t disk_cgroup_read_quota(struct cgroup *cont, struct cftype *cft,
+                               struct file *file, char __user *buf,
+                               size_t nbytes, loff_t *ppos)
+{
+        ssize_t count, ret = 0;
+        struct disk_cgroup_quota quota;
+        struct disk_cgroup_stat stat;
+        struct disk_cgroup *disk;
+        char *page;
+
+        page = (char *)__get_free_page(GFP_TEMPORARY);
+        if (!page)
+                return -ENOMEM;
+
+        cgroup_lock();
+        if (cgroup_is_removed(cont)) {
+                cgroup_unlock();
+                ret = -ENODEV;
+                goto out;
+        }
+
+        disk = cgroup_to_disk_cgroup(cont);
+        spin_lock_irq(&disk->lock);
+
+        /* may cause segment fault, pay attention */
+        memcpy(&quota,&(disk->quota),sizeof(quota));
+        memcpy(&stat,&(disk->stat),sizeof(stat));
+
+        spin_unlock_irq(&disk->lock);
+        cgroup_unlock();
+
+	switch(cft->private) {
+		case DISK_USAGE_STAT:
+        	/* print additional debugging stuff */
+	        count = sprintf(page,   "            type: %s\n"
+        	                        "         lookups: %d\n"
+                	                "           drops: %d\n"
+                        	        "           reads: %d\n"
+	                                "          writes: %d\n"
+        	                        "      cache_hits: %d\n"
+                	                "allocated_dquots: %d\n"
+                        	        "     free_dquots: %d\n"
+	                                "           syncs: %d\n",
+	                                cft->name,
+        	                        stat.lookups, stat.drops, stat.reads,
+                	                stat.writes, stat.cache_hits,
+                        	        stat.allocated_dquots, stat.free_dquots,
+	                                stat.syncs);
+		break;
+		case DISK_CURRENT_BLOCK:
+                count = sprintf(page,   "current usage of block: %llu\n",
+					quota.dqb_curspace);
+		break;
+		case DISK_CURRENT_INODE:
+                count = sprintf(page,   "current usage of inode: %llu\n",
+                                        quota.dqb_curinodes);
+		break;
+		case DISK_MAX_USAGE_BLOCK:
+                count = sprintf(page,   "%llu\n",
+                                        quota.dqb_bhardlimit);
+		break;
+		case DISK_MAX_USAGE_INODE:
+                count = sprintf(page,   "%llu\n",
+                                        quota.dqb_ihardlimit);
+		break;
+		case DISK_LIMIT_BLOCK:
+                count = sprintf(page,   "%llu\n",
+                                        quota.dqb_bsoftlimit);
+		break;
+		case DISK_LIMIT_INODE:
+                count = sprintf(page,   "%llu\n",
+                                        quota.dqb_isoftlimit);
+                break;
+		default:
+			goto out;
+
+	}
+        ret = simple_read_from_buffer(buf, nbytes, ppos, page, count);
+
+out:
+        free_page((unsigned long)page);
+        return ret;
+}
+
+
+static int disk_cgroup_write_u64(struct cgroup *cont, struct cftype *cft,
+				 u64 val)
+{
+	struct disk_cgroup *disk;
+	int ret = 0;
+
+	cgroup_lock();
+	if (cgroup_is_removed(cont)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	disk = cgroup_to_disk_cgroup(cont);
+
+	spin_lock_irq(&disk->lock);
+	switch(cft->private)
+	{
+		case DISK_MAX_USAGE_BLOCK:
+			disk->quota.dqb_bhardlimit = (unsigned long long) val;
+			break;
+		case DISK_MAX_USAGE_INODE:
+			disk->quota.dqb_ihardlimit = (unsigned long long) val;
+			break;
+		case DISK_LIMIT_BLOCK:
+			disk->quota.dqb_bsoftlimit = (unsigned long long) val;
+			break;
+		case DISK_LIMIT_INODE:
+			disk->quota.dqb_isoftlimit = (unsigned long long) val;
+			break;
+		default:
+			break;
+	}
+	spin_unlock_irq(&disk->lock);
+
+out:
+	cgroup_unlock();
+	return ret;
+}
+
+
+static struct cftype disk_cgroup_files[] = {
+	{ 
+		.name = "stat", 
+		.read = disk_cgroup_read_stat,
+		.private = DISK_USAGE_STAT,
+	},
+        {
+                .name = "usage_in_block",
+                .read = disk_cgroup_read_quota,
+                .private = DISK_CURRENT_BLOCK,
+        },
+        {
+                .name = "usage_in_inode",
+                .read = disk_cgroup_read_quota,
+                .private = DISK_CURRENT_INODE,
+        },
+        {
+                .name = "max_usage_in_block",
+                .read = disk_cgroup_read_quota,
+                .write_u64 = disk_cgroup_write_u64,
+                .private = DISK_MAX_USAGE_BLOCK,
+        },
+        {
+                .name = "max_usage_in_inode",
+                .read = disk_cgroup_read_quota,
+                .write_u64 = disk_cgroup_write_u64,
+                .private = DISK_MAX_USAGE_INODE,
+        },
+        {
+                .name = "limit_in_block",
+                .read = disk_cgroup_read_quota,
+                .write_u64 = disk_cgroup_write_u64,
+                .private = DISK_LIMIT_BLOCK,
+        },
+        {
+                .name = "limit_in_inode",
+                .read = disk_cgroup_read_quota,
+                .write_u64 = disk_cgroup_write_u64,
+                .private = DISK_LIMIT_INODE,
+        },
+};
+
+static int disk_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	return cgroup_add_files(cont, ss, disk_cgroup_files, ARRAY_SIZE(disk_cgroup_files));
+}
+
+struct cgroup_subsys disk_cgroup_subsys = {
+	.name = "disk",
+	.create = disk_cgroup_create,
+	.destroy = disk_cgroup_destroy,
+	.populate = disk_cgroup_populate,
+	.subsys_id = disk_cgroup_subsys_id,
+};
+
+void disk_cgroup_acct_stat(struct dqstats *pstat)
+{
+	struct disk_cgroup *disk;
+
+	disk = task_to_disk_cgroup(current);
+	if (!disk)
+		return;
+
+	disk->stat.lookups += pstat->lookups;
+	disk->stat.drops += pstat->drops;
+	disk->stat.reads += pstat->reads;
+	disk->stat.writes += pstat->writes;
+	disk->stat.cache_hits += pstat->cache_hits;
+	disk->stat.allocated_dquots += pstat->allocated_dquots;
+	disk->stat.free_dquots += pstat->free_dquots;
+	disk->stat.syncs += pstat->syncs;
+}
+EXPORT_SYMBOL(disk_cgroup_acct_stat);
+
+void disk_cgroup_acct_quota(int dq_type, int inc, unsigned long long number)
+{
+        struct disk_cgroup *disk;
+
+        disk = task_to_disk_cgroup(current);
+        if (!disk)
+                return;
+
+	if(dq_type == DISK_CURRENT_BLOCK ) {
+		if(inc)
+        		disk->quota.dqb_curspace += number;
+		else if(disk->quota.dqb_curspace > number)
+			disk->quota.dqb_curspace -= number;
+		else disk->quota.dqb_curspace = 0;
+	}
+	else if(dq_type == DISK_CURRENT_INODE) {
+		if(inc)
+			disk->quota.dqb_curinodes += number;
+		else if(disk->quota.dqb_curinodes > number)
+			disk->quota.dqb_curinodes -= number;
+		else disk->quota.dqb_curinodes = 0;
+	}
+}
+EXPORT_SYMBOL(disk_cgroup_acct_quota);
+
+int disk_cgroup_check_quota(int dq_type, unsigned long long number)
+{
+        struct disk_cgroup *disk;
+	int ret = 0;
+
+        disk = task_to_disk_cgroup(current);
+        if (!disk)
+                return ret;
+
+
+        if(dq_type == DISK_CURRENT_BLOCK 
+		&& disk->quota.dqb_bhardlimit > 0
+		&& disk->quota.dqb_curspace + number > disk->quota.dqb_bhardlimit)
+        	ret = -1;
+        else if(dq_type == DISK_CURRENT_INODE
+                && disk->quota.dqb_ihardlimit > 0
+                && disk->quota.dqb_curinodes + number > disk->quota.dqb_ihardlimit)
+                ret = -1;
+
+	return ret;
+}
+EXPORT_SYMBOL(disk_cgroup_check_quota);
diff -Naur linux-2.6.28.5/kernel/cgroup_disk.h linux-2.6.28.5-cgroup-disk-quota/kernel/cgroup_disk.h
--- linux-2.6.28.5/kernel/cgroup_disk.h	1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/kernel/cgroup_disk.h	2009-02-19 06:52:02.000000000 +0800
@@ -0,0 +1,26 @@
+#ifndef CGROUP_DISK_H
+#define	CGROUP_DISK_H
+
+#include <linux/quota.h>
+
+enum {
+	DISK_MAX_USAGE_BLOCK,
+	DISK_CURRENT_BLOCK,
+	DISK_LIMIT_BLOCK,
+
+	DISK_MAX_USAGE_INODE,
+	DISK_CURRENT_INODE,
+	DISK_LIMIT_INODE,
+
+	DISK_USAGE_STAT,
+};
+
+#ifdef CONFIG_CGROUP_DISK
+extern void disk_cgroup_acct_stat(struct dqstats *pstat);
+extern void disk_cgroup_acct_quota(int dq_type, int inc, unsigned long number);
+#else
+static inline void disk_cgroup_acct_stat(struct dqstats *pstat) { }
+static inline void disk_cgroup_acct_quota(int dq_type, int inc, unsigned long number) { }
+#endif /* CONFIG_CGROUP_DISKQUOTA */
+
+#endif
diff -Naur linux-2.6.28.5/kernel/Makefile linux-2.6.28.5-cgroup-disk-quota/kernel/Makefile
--- linux-2.6.28.5/kernel/Makefile	2009-02-13 01:51:15.000000000 +0800
+++ linux-2.6.28.5-cgroup-disk-quota/kernel/Makefile	2009-02-19 06:52:04.000000000 +0800
@@ -55,6 +55,7 @@
 obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
 obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
+obj-$(CONFIG_CGROUP_FREEZER) += cgroup_disk.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_UTS_NS) += utsname.o

[-- Attachment #3: Type: text/plain, Size: 206 bytes --]

_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linux-foundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] cgroup for disk quota
       [not found]     ` <d95d44a20902200221w67ee1b49ua6027f3090186af9-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-02-20 10:32       ` Rolando Martins
       [not found]         ` <b6a2d2e20902200232x33204c61j30837a697d69c4d4-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Rolando Martins @ 2009-02-20 10:32 UTC (permalink / raw)
  To: anqin
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	menage-hpIqsD4AKlfQT0dZR+AlfA

Hi,
from the perspective of an application developer, this approach would
be perfect if we could have some IO bandwidth reservation mechanism
like disk.usage_io_usage (perhaps per disk...).

Keep the good work,
Rolando


On Fri, Feb 20, 2009 at 10:21 AM, anqin <anqin.qin-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
> ---------- Forwarded message ----------
> From: anqin <anqin.qin-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> Date: Fri, Feb 20, 2009 at 4:28 PM
> Subject: [PATCH] cgroup for disk quota
> To: Daniel Lezcano <dlezcano-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>, "Serge E. Hallyn"
> <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>, containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org
> Cc: Ian jonhson <jonhson.ian-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>
>
> Dear all,
>
> For unified management of resources (CPU, memory, disk, network),
> I (and Ian) developed a cgroup subsystem to control the usage
> of disk quota.
>
> The subsystem for disk quota (disk_cgroup, to be brief) does accounting
> of inode and block allocated by ext3/ext2 filesystem. Simarily as
> filesystem quota, the disk_cgroup can do limitation but without needing
> to open filesytem quota options (e.g. usrquota,grpquota in /etc/fstab).
> Since this patch is first developed, it needs more feedback and testing
> from other developers or users.
>
> The simple usage of disk_cgroup is as follows:
>
> # mount -t cgroup cgroup /mnt/cgrp
> # lxc-execute -n lxc-template.conf /bin/bash
> # ls /mnt/cgrp/11457/           // <--  11457 is the pid of bash
> ...
> disk.stat
> disk.usage_in_inode
> disk.usage_in_block
> disk.max_usage_in_inode
> disk.max_usage_in_block
> disk.limit_in_inode
> disk.limit_in_inode
> ...
>
> # echo  3 > /mnt/cgrp/11457/disk.max_usage_in_block
>
> # touch /tmp/mytestfile1
> # touch /tmp/mytestfile2
> # touch /tmp/mytestfile3
> # touch /tmp/mytestfile4
> touch: cannot touch `/tmp/mytestfile4': Disk quota exceeded
>
> The disk_cgroup is easily extended to manage complex objects
> of filesystem.
>
> BTW, I don't know how to submit a "useful" patch to kernel community. Or,
> maybe the patch is not useful at all and maybe has been developed by other
> developers. I very appreciate if experts could give me some commend.
> I will continue to develop cgroup-related codes to make contribution to kernel
> development.
>
> Any comment is welcome,
>
> Anqin
>
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] cgroup for disk quota
       [not found]         ` <b6a2d2e20902200232x33204c61j30837a697d69c4d4-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-02-20 11:00           ` anqin
       [not found]             ` <d95d44a20902200300x43ef2ccfh6a66d3b81056d7fd-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: anqin @ 2009-02-20 11:00 UTC (permalink / raw)
  To: Rolando Martins
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	menage-hpIqsD4AKlfQT0dZR+AlfA

> from the perspective of an application developer, this approach would
> be perfect if we could have some IO bandwidth reservation mechanism
> like disk.usage_io_usage (perhaps per disk...).
>

Indeed, that is my next job (exactly, current job).

Although Paolo Valente has presented a solution of I/O bandwidth control
(see http://lwn.net/Articles/309400/), I would more like to present
bandwidth reservation mechanism, especially for data-intensive
applications (e.g. hadoop cluster).

This kind of applications take more account of the reservation of
I/O bandwidth and netword bandwidth.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] cgroup for disk quota
       [not found] ` <d95d44a20902200028h1e229cc0pa3cdd4f42814e78e-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2009-02-20 10:21   ` Fwd: " anqin
@ 2009-02-20 13:45   ` Daniel Lezcano
       [not found]     ` <499EB403.9050403-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  1 sibling, 1 reply; 9+ messages in thread
From: Daniel Lezcano @ 2009-02-20 13:45 UTC (permalink / raw)
  To: anqin; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

anqin wrote:
> Dear Daniel and Serge,
> 
> For unified management of resources (CPU, memory, disk, network),
> I (and Ian) developed a cgroup subsystem to control the usage
> of disk quota.
> 
> The subsystem for disk quota (disk_cgroup, to be brief) does accounting
> of inode and block allocated by ext3/ext2 filesystem. Simarily as
> filesystem quota, the disk_cgroup can do limitation but without needing
> to open filesytem quota options (e.g. usrquota,grpquota in /etc/fstab).
> Since this patch is first developed, it needs more feedback and testing
> from other developers or users.


Cool, that looks like a very interesting feature :)

> The simple usage of disk_cgroup is as follows:
> 
> # mount -t cgroup cgroup /mnt/cgrp
> # lxc-execute -n lxc-template.conf /bin/bash
> # ls /mnt/cgrp/11457/		// <--  11457 is the pid of bash
> ...
> disk.stat
> disk.usage_in_inode
> disk.usage_in_block
> disk.max_usage_in_inode
> disk.max_usage_in_block
> disk.limit_in_inode
> disk.limit_in_inode
> ...
> 
> # echo  3 > /mnt/cgrp/11457/disk.max_usage_in_block
> 
> # touch /tmp/mytestfile1
> # touch /tmp/mytestfile2
> # touch /tmp/mytestfile3
> # touch /tmp/mytestfile4
> touch: cannot touch `/tmp/mytestfile4': Disk quota exceeded
> 
> The disk_cgroup is easily extended to manage complex objects
> of filesystem.
> 
> BTW, I don't know how to submit a "useful" patch to kernel community. Or,
> maybe the patch is not useful at all and maybe has been developed by other
> developers. I very appreciate if both of experts could give me some commend.
> I will continue to develop cgroup-related codes to make contribution to kernel
> development.

I am not sure I understand what you mean by "useful", but it seems you 
did an interesting feature. All the documentation related to submitting 
patches is in Documentation/SubmittingPatches, I hope that helps.

send your patches to lkml@ prefixed with [RFC] in the subject ('quilt' 
is your friend). If the functionality already exists, someone will tell you.

Thanks.
  -- Daniel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] cgroup for disk quota
       [not found]             ` <d95d44a20902200300x43ef2ccfh6a66d3b81056d7fd-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-02-20 13:51               ` Daniel Lezcano
       [not found]                 ` <499EB56B.6030502-GANU6spQydw@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Daniel Lezcano @ 2009-02-20 13:51 UTC (permalink / raw)
  To: anqin
  Cc: menage-hpIqsD4AKlfQT0dZR+AlfA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

anqin wrote:
>> from the perspective of an application developer, this approach would
>> be perfect if we could have some IO bandwidth reservation mechanism
>> like disk.usage_io_usage (perhaps per disk...).
>>
>>     
>
> Indeed, that is my next job (exactly, current job).
>
> Although Paolo Valente has presented a solution of I/O bandwidth control
> (see http://lwn.net/Articles/309400/), I would more like to present
> bandwidth reservation mechanism, especially for data-intensive
> applications (e.g. hadoop cluster).
>
> This kind of applications take more account of the reservation of
> I/O bandwidth and netword bandwidth.
>   
Does this feature do what you want for network bandwidth ?

http://lwn.net/Articles/291161/

Option in the kernel:

Networking Support
  -> Networking options
    -> QoS and/or fair queueing
      -> Control Group Classifier

I didn't tested it yet ... :)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] cgroup for disk quota
       [not found]                 ` <499EB56B.6030502-GANU6spQydw@public.gmane.org>
@ 2009-02-20 16:14                   ` anqin
  0 siblings, 0 replies; 9+ messages in thread
From: anqin @ 2009-02-20 16:14 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: menage-hpIqsD4AKlfQT0dZR+AlfA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

>> Although Paolo Valente has presented a solution of I/O bandwidth control
>> (see http://lwn.net/Articles/309400/), I would more like to present
>> bandwidth reservation mechanism, especially for data-intensive
>> applications (e.g. hadoop cluster).
>>
>> This kind of applications take more account of the reservation of
>> I/O bandwidth and netword bandwidth.
>>
>
> Does this feature do what you want for network bandwidth ?
>
> http://lwn.net/Articles/291161/
>

Well. I will make a close look in the work.

Thanks you very much


Best Regards,

Anqin

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] cgroup for disk quota
       [not found]     ` <499EB403.9050403-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2009-02-20 16:30       ` anqin
       [not found]         ` <d95d44a20902200830l1645e8b6j963f4ca62b5452f0-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: anqin @ 2009-02-20 16:30 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

>> BTW, I don't know how to submit a "useful" patch to kernel community. Or,
>> maybe the patch is not useful at all and maybe has been developed by other
>> developers. I very appreciate if both of experts could give me some
>> commend.
>> I will continue to develop cgroup-related codes to make contribution to
>> kernel
>> development.
>
> I am not sure I understand what you mean by "useful", but it seems you did
> an interesting feature. All the documentation related to submitting patches
> is in Documentation/SubmittingPatches, I hope that helps.
>

I learn from some kernel developers that they said only useful patches will be
accepted by community and applied on next kernel mainstream version.
So, I don't know whether or not my patch can be seen as "useful" by
most of users while it is developed under my project requirements.


> send your patches to lkml@ prefixed with [RFC] in the subject ('quilt' is
> your friend). If the functionality already exists, someone will tell you.
>

mmh...  SubmittingPatches said patches should be sent to
linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org if there is not "MAINTAINERS file",
after fixed into standard style.  what are different with that
you have said? The " lkml@ prefixed " seems not be mentioned
in SubmittingPatches, what should I do?

Thank you very much,

Anqin

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] cgroup for disk quota
       [not found]         ` <d95d44a20902200830l1645e8b6j963f4ca62b5452f0-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-02-20 16:44           ` Daniel Lezcano
  0 siblings, 0 replies; 9+ messages in thread
From: Daniel Lezcano @ 2009-02-20 16:44 UTC (permalink / raw)
  To: anqin; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

anqin wrote:
>>> BTW, I don't know how to submit a "useful" patch to kernel community. Or,
>>> maybe the patch is not useful at all and maybe has been developed by other
>>> developers. I very appreciate if both of experts could give me some
>>> commend.
>>> I will continue to develop cgroup-related codes to make contribution to
>>> kernel
>>> development.
>> I am not sure I understand what you mean by "useful", but it seems you did
>> an interesting feature. All the documentation related to submitting patches
>> is in Documentation/SubmittingPatches, I hope that helps.
>>
> 
> I learn from some kernel developers that they said only useful patches will be
> accepted by community and applied on next kernel mainstream version.
> So, I don't know whether or not my patch can be seen as "useful" by
> most of users while it is developed under my project requirements.
> 
> 
>> send your patches to lkml@ prefixed with [RFC] in the subject ('quilt' is
>> your friend). If the functionality already exists, someone will tell you.
>>
> 
> mmh...  SubmittingPatches said patches should be sent to
> linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org if there is not "MAINTAINERS file",
> after fixed into standard style.  what are different with that
> you have said? The " lkml@ prefixed " seems not be mentioned
> in SubmittingPatches, what should I do?

lkml@ = linux kernel mailing list aka linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

I think you will find the answers of all your questions here:
	http://kernelnewbies.org/UpstreamMerge

If you are looking for the recipients of your patches, IMO you should send:
	To: Paul Menage
	Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
	Cc: containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org

cf MAINTENERS file:

CONTROL GROUPS (CGROUPS)
P:      Paul Menage
M:      menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org
L:      containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
S:      Maintained

Regards
   -- Daniel

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2009-02-20 16:44 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-02-20  8:28 [PATCH] cgroup for disk quota anqin
     [not found] ` <d95d44a20902200028h1e229cc0pa3cdd4f42814e78e-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-02-20 10:21   ` Fwd: " anqin
     [not found]     ` <d95d44a20902200221w67ee1b49ua6027f3090186af9-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-02-20 10:32       ` Rolando Martins
     [not found]         ` <b6a2d2e20902200232x33204c61j30837a697d69c4d4-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-02-20 11:00           ` anqin
     [not found]             ` <d95d44a20902200300x43ef2ccfh6a66d3b81056d7fd-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-02-20 13:51               ` Daniel Lezcano
     [not found]                 ` <499EB56B.6030502-GANU6spQydw@public.gmane.org>
2009-02-20 16:14                   ` anqin
2009-02-20 13:45   ` Daniel Lezcano
     [not found]     ` <499EB403.9050403-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2009-02-20 16:30       ` anqin
     [not found]         ` <d95d44a20902200830l1645e8b6j963f4ca62b5452f0-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-02-20 16:44           ` Daniel Lezcano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.