linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] quota: 64-bit limits with vfs
@ 2008-03-06 13:41 Andrew Perepechko
  2008-03-06 14:48 ` Jan Kara
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Perepechko @ 2008-03-06 13:41 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Johann Lombardi, Zhiyong Landen tian, Alex Lyashkov

Hello!

We are in need of large (above 4 TB) block quota limits, but it seems like XFS filesystem 
(having its own quota implementation) is the only available fs that supports them. Currently
ext3 supports up to 8 TB of data and forthcoming ext4 will support even more.

Linux kernel has two implementations of quota format modules:
quota_v1 (with QFMT_VFS_OLD id)
quota_v2 (with QFMT_VFS_V0 id)
Either uses 32-bit data types to store quota limits on disk
(see struct v1_disk_dqblk and struct v2_disk_dqblk). Block quota limits 
are stored in 1kb units (QUOTABLOCK_SIZE constant) which gives
the largest possible quota limit of (2^32-1)*2^10 bytes ~ 4 TB.

In-memory quota entries representation suffers from the same 4 TB 
limitation (see struct mem_dqblk).

The patch below adds a separate quota_v3 module which deals with 64-bit data  to solve the problem
(another possible approach is to merge the code into quota_v2 module to reuse some amount of the code - 
this won't reuse a lot because there're too many references to disk_dqblk structures and dependent constants).

Could you comment on the patch and the idea behind it in general?

Thank you.
Andrew.

---

 fs/Kconfig                 |    7
 fs/Makefile                |    1
 fs/ext3/super.c            |   12
 fs/quota_v1.c              |   50 ++-
 fs/quota_v2.c              |   45 +-
 fs/quota_v3.c              |  739 +++++++++++++++++++++++++++++++++++++++++++++
 fs/reiserfs/super.c        |    2
 include/linux/dqblk_v3.h   |   26 +
 include/linux/quota.h      |   23 -
 include/linux/quotaio_v3.h |   81 ++++
 10 files changed, 952 insertions(+), 34 deletions(-)

---

diff -rNpu quota.orig/fs/Kconfig quota/fs/Kconfig
--- quota.orig/fs/Kconfig	2008-01-24 14:33:56.000000000 +0300
+++ quota/fs/Kconfig	2008-02-27 16:49:56.108413855 +0300
@@ -488,6 +488,13 @@ config QFMT_V2
 	  This quota format allows using quotas with 32-bit UIDs/GIDs. If you
 	  need this functionality say Y here.
 
+config QFMT_V3
+	tristate "Quota format v3 support"
+	depends on QUOTA
+	help
+	  This quota format allows using quotas with 32-bit UIDs/GIDs and 64-bit
+	  limits. If you need this functionality say Y here.
+
 config QUOTACTL
 	bool
 	depends on XFS_QUOTA || QUOTA
diff -rNpu quota.orig/fs/Makefile quota/fs/Makefile
--- quota.orig/fs/Makefile	2008-01-24 14:33:54.000000000 +0300
+++ quota/fs/Makefile	2008-02-27 16:50:13.436477156 +0300
@@ -40,6 +40,7 @@ obj-$(CONFIG_GENERIC_ACL)	+= generic_acl
 obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
 obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
+obj-$(CONFIG_QFMT_V3)		+= quota_v3.o
 obj-$(CONFIG_QUOTACTL)		+= quota.o
 
 obj-$(CONFIG_DMAPI)		+= dmapi/
diff -rNpu quota.orig/fs/quota_v1.c quota/fs/quota_v1.c
--- quota.orig/fs/quota_v1.c	2006-03-20 08:53:29.000000000 +0300
+++ quota/fs/quota_v1.c	2008-02-29 19:15:23.325159161 +0300
@@ -25,8 +25,16 @@ static void v1_disk2mem_dqblk(struct mem
 	m->dqb_btime = d->dqb_btime;
 }
 
-static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m)
+static int v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m)
 {
+	__u32 typelimit = ~((__u32)0);
+
+	if (m->dqb_ihardlimit > typelimit ||
+	    m->dqb_isoftlimit > typelimit ||
+	    m->dqb_bhardlimit > typelimit ||
+	    m->dqb_bsoftlimit > typelimit)
+		return -EINVAL;
+
 	d->dqb_ihardlimit = m->dqb_ihardlimit;
 	d->dqb_isoftlimit = m->dqb_isoftlimit;
 	d->dqb_curinodes = m->dqb_curinodes;
@@ -35,6 +43,8 @@ static void v1_mem2disk_dqblk(struct v1_
 	d->dqb_curblocks = toqb(m->dqb_curspace);
 	d->dqb_itime = m->dqb_itime;
 	d->dqb_btime = m->dqb_btime;
+
+	return 0;
 }
 
 static int v1_read_dqblk(struct dquot *dquot)
@@ -64,7 +74,10 @@ static int v1_commit_dqblk(struct dquot
 	ssize_t ret;
 	struct v1_disk_dqblk dqblk;
 
-	v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb);
+	ret = v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb);
+	if (ret < 0)
+		return ret;
+
 	if (dquot->dq_id == 0) {
 		dqblk.dqb_btime = sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace;
 		dqblk.dqb_itime = sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace;
@@ -88,7 +101,7 @@ out:
 	return ret;
 }
 
-/* Magics of new quota format */
+/* Magics of vfsv0 quota format */
 #define V2_INITQMAGICS {\
 	0xd9c01f11,     /* USRQUOTA */\
 	0xd9c01927      /* GRPQUOTA */\
@@ -100,15 +113,29 @@ struct v2_disk_dqheader {
 	__le32 dqh_version;      /* File version */
 };
 
+/* Magics of vfsv1 quota format */
+#define V3_INITQMAGICS {\
+	0xd9c01f11,     /* USRQUOTA */\
+	0xd9c01927      /* GRPQUOTA */\
+}
+
+/* Header of new quota format */
+struct v3_disk_dqheader {
+	__le32 dqh_magic;        /* Magic number identifying file */
+	__le32 dqh_version;      /* File version */
+};
+
 static int v1_check_quota_file(struct super_block *sb, int type)
 {
 	struct inode *inode = sb_dqopt(sb)->files[type];
 	ulong blocks;
 	size_t off; 
-	struct v2_disk_dqheader dqhead;
-	ssize_t size;
+	struct v2_disk_dqheader dqhead_v2;
+	struct v3_disk_dqheader dqhead_v3;
+	ssize_t size_v2, size_v3;
 	loff_t isize;
-	static const uint quota_magics[] = V2_INITQMAGICS;
+	static const uint quota_magics_v2[] = V2_INITQMAGICS,
+			  quota_magics_v3[] = V3_INITQMAGICS;
 
 	isize = i_size_read(inode);
 	if (!isize)
@@ -118,10 +145,15 @@ static int v1_check_quota_file(struct su
 	if ((blocks % sizeof(struct v1_disk_dqblk) * BLOCK_SIZE + off) % sizeof(struct v1_disk_dqblk))
 		return 0;
 	/* Doublecheck whether we didn't get file with new format - with old quotactl() this could happen */
-	size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
-	if (size != sizeof(struct v2_disk_dqheader))
+	size_v2 = sb->s_op->quota_read(sb, type, (char *)&dqhead_v2,
+				       sizeof(struct v2_disk_dqheader), 0);
+	size_v3 = sb->s_op->quota_read(sb, type, (char *)&dqhead_v3,
+				       sizeof(struct v3_disk_dqheader), 0);
+	if (size_v2 != sizeof(struct v2_disk_dqheader) &&
+	    size_v3 != sizeof(struct v3_disk_dqheader))
 		return 1;	/* Probably not new format */
-	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type])
+	if (le32_to_cpu(dqhead_v2.dqh_magic) != quota_magics_v2[type] &&
+	    le32_to_cpu(dqhead_v3.dqh_magic) != quota_magics_v3[type])
 		return 1;	/* Definitely not new format */
 	printk(KERN_INFO "VFS: %s: Refusing to turn on old quota format on given file. It probably contains newer quota format.\n", sb->s_id);
         return 0;		/* Seems like a new format file -> refuse it */
diff -rNpu quota.orig/fs/quota_v2.c quota/fs/quota_v2.c
--- quota.orig/fs/quota_v2.c	2006-03-20 08:53:29.000000000 +0300
+++ quota/fs/quota_v2.c	2008-02-28 10:37:31.067602129 +0300
@@ -106,17 +106,27 @@ static void disk2memdqb(struct mem_dqblk
 	m->dqb_btime = le64_to_cpu(d->dqb_btime);
 }
 
-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
+static int mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
 {
-	d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
-	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
-	d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
+	__u32 typelimit = ~((__u32)0);
+
+	if (m->dqb_ihardlimit > typelimit ||
+	    m->dqb_isoftlimit > typelimit ||
+	    m->dqb_bhardlimit > typelimit ||
+	    m->dqb_bsoftlimit > typelimit)
+		return -EINVAL;
+
+	d->dqb_ihardlimit = cpu_to_le32((__u32)m->dqb_ihardlimit);
+	d->dqb_isoftlimit = cpu_to_le32((__u32)m->dqb_isoftlimit);
+	d->dqb_curinodes = cpu_to_le32((__u32)m->dqb_curinodes);
 	d->dqb_itime = cpu_to_le64(m->dqb_itime);
-	d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
-	d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
+	d->dqb_bhardlimit = cpu_to_le32((__u32)m->dqb_bhardlimit);
+	d->dqb_bsoftlimit = cpu_to_le32((__u32)m->dqb_bsoftlimit);
 	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
 	d->dqb_btime = cpu_to_le64(m->dqb_btime);
 	d->dqb_id = cpu_to_le32(id);
+
+	return 0;
 }
 
 static dqbuf_t getdqbuf(void)
@@ -394,14 +404,12 @@ static int v2_write_dquot(struct dquot *
 	ssize_t ret;
 	struct v2_disk_dqblk ddquot, empty;
 
-	/* dq_off is guarded by dqio_sem */
-	if (!dquot->dq_off)
-		if ((ret = dq_insert_tree(dquot)) < 0) {
-			printk(KERN_ERR "VFS: Error %zd occurred while creating quota.\n", ret);
-			return ret;
-		}
 	spin_lock(&dq_data_lock);
-	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
+	ret = mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
+	if (ret < 0) {
+		spin_unlock(&dq_data_lock);
+		return ret;
+	}
 	/* Argh... We may need to write structure full of zeroes but that would be
 	 * treated as an empty place by the rest of the code. Format change would
 	 * be definitely cleaner but the problems probably are not worth it */
@@ -409,6 +417,17 @@ static int v2_write_dquot(struct dquot *
 	if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
 		ddquot.dqb_itime = cpu_to_le64(1);
 	spin_unlock(&dq_data_lock);
+
+	/* dq_off is guarded by dqio_sem */
+	if (!dquot->dq_off) {
+		ret = dq_insert_tree(dquot);
+		if (ret < 0) {
+			printk(KERN_ERR "VFS: Error %zd occurred "
+			       "while creating quota.\n", ret);
+			return ret;
+		}
+	}
+
 	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
 	      (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
 	if (ret != sizeof(struct v2_disk_dqblk)) {
diff -rNpu quota.orig/fs/quota_v3.c quota/fs/quota_v3.c
--- quota.orig/fs/quota_v3.c	1970-01-01 03:00:00.000000000 +0300
+++ quota/fs/quota_v3.c	2008-02-28 10:55:05.981528558 +0300
@@ -0,0 +1,739 @@
+/*
+ *	vfsv1 quota IO operations on file
+ *
+ *	adds support for quota limits above 4 TB
+ *
+ *	based on quota_v3.c by Jan Kara
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/dqblk_v3.h>
+#include <linux/quotaio_v3.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <asm/byteorder.h>
+
+MODULE_DESCRIPTION("Quota format v3 support");
+MODULE_LICENSE("GPL");
+
+#define __QUOTA_V3_PARANOIA
+
+typedef char *dqbuf_t;
+
+#define GETIDINDEX(id, depth) (((id) >> ((V3_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
+#define GETENTRIES(buf) ((struct v3_disk_dqblk *) \
+			  (((char *)buf)+sizeof(struct v3_disk_dqdbheader)))
+
+/* Check whether given file is really vfsv1 quotafile */
+static int v3_check_quota_file(struct super_block *sb, int type)
+{
+	struct v3_disk_dqheader dqhead;
+	ssize_t size;
+	static const uint quota_magics[] = V3_INITQMAGICS;
+	static const uint quota_versions[] = V3_INITQVERSIONS;
+
+	size = sb->s_op->quota_read(sb, type, (char *)&dqhead,
+				    sizeof(struct v3_disk_dqheader), 0);
+	if (size != sizeof(struct v3_disk_dqheader)) {
+		printk(KERN_WARNING "quota_v3: failed read expected=%zd, "
+		       "got=%zd\n", sizeof(struct v3_disk_dqheader), size);
+		return 0;
+	}
+	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
+	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
+		return 0;
+	return 1;
+}
+
+/* Read information header from quota file */
+static int v3_read_file_info(struct super_block *sb, int type)
+{
+	struct v3_disk_dqinfo dinfo;
+	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+	ssize_t size;
+
+	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
+	       sizeof(struct v3_disk_dqinfo), V3_DQINFOOFF);
+	if (size != sizeof(struct v3_disk_dqinfo)) {
+		printk(KERN_WARNING "Can't read info structure on device %s.\n",
+			sb->s_id);
+		return -1;
+	}
+	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
+	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
+	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
+	info->u.v3_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
+	info->u.v3_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
+	info->u.v3_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+	return 0;
+}
+
+/* Write information header to quota file */
+static int v3_write_file_info(struct super_block *sb, int type)
+{
+	struct v3_disk_dqinfo dinfo;
+	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+	ssize_t size;
+
+	spin_lock(&dq_data_lock);
+	info->dqi_flags &= ~DQF_INFO_DIRTY;
+	dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
+	dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
+	dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
+	spin_unlock(&dq_data_lock);
+	dinfo.dqi_blocks = cpu_to_le32(info->u.v3_i.dqi_blocks);
+	dinfo.dqi_free_blk = cpu_to_le32(info->u.v3_i.dqi_free_blk);
+	dinfo.dqi_free_entry = cpu_to_le32(info->u.v3_i.dqi_free_entry);
+	size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
+	       sizeof(struct v3_disk_dqinfo), V3_DQINFOOFF);
+	if (size != sizeof(struct v3_disk_dqinfo)) {
+		printk(KERN_WARNING "Can't write info structure "
+			"on device %s.\n", sb->s_id);
+		return -1;
+	}
+	return 0;
+}
+
+static void disk2memdqb(struct mem_dqblk *m, struct v3_disk_dqblk *d)
+{
+	m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit);
+	m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit);
+	m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes);
+	m->dqb_itime = le64_to_cpu(d->dqb_itime);
+	m->dqb_bhardlimit = le64_to_cpu(d->dqb_bhardlimit);
+	m->dqb_bsoftlimit = le64_to_cpu(d->dqb_bsoftlimit);
+	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
+	m->dqb_btime = le64_to_cpu(d->dqb_btime);
+}
+
+static void mem2diskdqb(struct v3_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
+{
+	d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
+	d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
+	d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
+	d->dqb_itime = cpu_to_le64(m->dqb_itime);
+	d->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
+	d->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
+	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+	d->dqb_btime = cpu_to_le64(m->dqb_btime);
+	d->dqb_id = cpu_to_le32(id);
+}
+
+static dqbuf_t getdqbuf(void)
+{
+	dqbuf_t buf = kmalloc(V3_DQBLKSIZE, GFP_NOFS);
+	if (!buf)
+		printk(KERN_WARNING "Not enough memory for quota buffers.\n");
+	return buf;
+}
+
+static inline void freedqbuf(dqbuf_t buf)
+{
+	kfree(buf);
+}
+
+static inline ssize_t read_blk(struct super_block *sb, int type,
+				uint blk, dqbuf_t buf)
+{
+	memset(buf, 0, V3_DQBLKSIZE);
+	return sb->s_op->quota_read(sb, type, (char *)buf,
+	       V3_DQBLKSIZE, blk << V3_DQBLKSIZE_BITS);
+}
+
+static inline ssize_t write_blk(struct super_block *sb, int type,
+				 uint blk, dqbuf_t buf)
+{
+	return sb->s_op->quota_write(sb, type, (char *)buf,
+	       V3_DQBLKSIZE, blk << V3_DQBLKSIZE_BITS);
+}
+
+/* Remove empty block from list and return it */
+static int get_free_dqblk(struct super_block *sb, int type)
+{
+	dqbuf_t buf = getdqbuf();
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct v3_disk_dqdbheader *dh = (struct v3_disk_dqdbheader *)buf;
+	int ret, blk;
+
+	if (!buf)
+		return -ENOMEM;
+	if (info->u.v3_i.dqi_free_blk) {
+		blk = info->u.v3_i.dqi_free_blk;
+		ret = read_blk(sb, type, blk, buf);
+		if (ret < 0)
+			goto out_buf;
+		info->u.v3_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
+	} else {
+		memset(buf, 0, V3_DQBLKSIZE);
+		/* Assure block allocation... */
+		ret = write_blk(sb, type, info->u.v3_i.dqi_blocks, buf);
+		if (ret < 0)
+			goto out_buf;
+		blk = info->u.v3_i.dqi_blocks++;
+	}
+	mark_info_dirty(sb, type);
+	ret = blk;
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Insert empty block to the list */
+static int put_free_dqblk(struct super_block *sb, int type,
+			   dqbuf_t buf, uint blk)
+{
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct v3_disk_dqdbheader *dh = (struct v3_disk_dqdbheader *)buf;
+	int err;
+
+	dh->dqdh_next_free = cpu_to_le32(info->u.v3_i.dqi_free_blk);
+	dh->dqdh_prev_free = cpu_to_le32(0);
+	dh->dqdh_entries = cpu_to_le16(0);
+	info->u.v3_i.dqi_free_blk = blk;
+	mark_info_dirty(sb, type);
+	/* Some strange block. We had better leave it... */
+	err = write_blk(sb, type, blk, buf);
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+/* Remove given block from the list of blocks with free entries */
+static int remove_free_dqentry(struct super_block *sb,
+				int type, dqbuf_t buf, uint blk)
+{
+	dqbuf_t tmpbuf = getdqbuf();
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct v3_disk_dqdbheader *dh = (struct v3_disk_dqdbheader *)buf;
+	uint nextblk = le32_to_cpu(dh->dqdh_next_free),
+	     prevblk = le32_to_cpu(dh->dqdh_prev_free);
+	int err;
+
+	if (!tmpbuf)
+		return -ENOMEM;
+	if (nextblk) {
+		err = read_blk(sb, type, nextblk, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+		((struct v3_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free;
+		err = write_blk(sb, type, nextblk, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+	}
+	if (prevblk) {
+		err = read_blk(sb, type, prevblk, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+		((struct v3_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free;
+		err = write_blk(sb, type, prevblk, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+	} else {
+		info->u.v3_i.dqi_free_entry = nextblk;
+		mark_info_dirty(sb, type);
+	}
+	freedqbuf(tmpbuf);
+	dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
+	/* No matter whether write succeeds block is out of list */
+	if (write_blk(sb, type, blk, buf) < 0)
+		printk(KERN_ERR "VFS: Can't write block (%u) "
+			"with free entries.\n", blk);
+	return 0;
+out_buf:
+	freedqbuf(tmpbuf);
+	return err;
+}
+
+/* Insert given block to the beginning of list with free entries */
+static int insert_free_dqentry(struct super_block *sb,
+				int type, dqbuf_t buf, uint blk)
+{
+	dqbuf_t tmpbuf = getdqbuf();
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct v3_disk_dqdbheader *dh = (struct v3_disk_dqdbheader *)buf;
+	int err;
+
+	if (!tmpbuf)
+		return -ENOMEM;
+	dh->dqdh_next_free = cpu_to_le32(info->u.v3_i.dqi_free_entry);
+	dh->dqdh_prev_free = cpu_to_le32(0);
+	err = write_blk(sb, type, blk, buf);
+	if (err < 0)
+		goto out_buf;
+	if (info->u.v3_i.dqi_free_entry) {
+		err = read_blk(sb, type, info->u.v3_i.dqi_free_entry, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+		((struct v3_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk);
+		err = write_blk(sb, type, info->u.v3_i.dqi_free_entry, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+	}
+	freedqbuf(tmpbuf);
+	info->u.v3_i.dqi_free_entry = blk;
+	mark_info_dirty(sb, type);
+	return 0;
+out_buf:
+	freedqbuf(tmpbuf);
+	return err;
+}
+
+/* Find space for dquot */
+static uint find_free_dqentry(struct dquot *dquot, int *err)
+{
+	struct super_block *sb = dquot->dq_sb;
+	struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
+	uint blk, i;
+	struct v3_disk_dqdbheader *dh;
+	struct v3_disk_dqblk *ddquot;
+	struct v3_disk_dqblk fakedquot;
+	dqbuf_t buf;
+
+	*err = 0;
+	buf = getdqbuf();
+	if (!buf) {
+		*err = -ENOMEM;
+		return 0;
+	}
+	dh = (struct v3_disk_dqdbheader *)buf;
+	ddquot = GETENTRIES(buf);
+	if (info->u.v3_i.dqi_free_entry) {
+		blk = info->u.v3_i.dqi_free_entry;
+		*err = read_blk(sb, dquot->dq_type, blk, buf);
+		if (*err < 0)
+			goto out_buf;
+	} else {
+		blk = get_free_dqblk(sb, dquot->dq_type);
+		if ((int)blk < 0) {
+			*err = blk;
+			freedqbuf(buf);
+			return 0;
+		}
+		memset(buf, 0, V3_DQBLKSIZE);
+		/* This is enough as block is already zeroed */
+		/* and entry list is empty...                */
+		info->u.v3_i.dqi_free_entry = blk;
+		mark_info_dirty(sb, dquot->dq_type);
+	}
+	if (le16_to_cpu(dh->dqdh_entries)+1 >= V3_DQSTRINBLK) {
+		*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk);
+		if (*err < 0) {
+			printk(KERN_ERR "VFS: find_free_dqentry(): Can't "
+			       "remove block (%u) from entry free list.\n",
+			       blk);
+			goto out_buf;
+		}
+	}
+	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
+	memset(&fakedquot, 0, sizeof(struct v3_disk_dqblk));
+	/* Find free structure in block */
+	for (i = 0; i < V3_DQSTRINBLK && memcmp(&fakedquot, ddquot+i,
+		    sizeof(struct v3_disk_dqblk)); i++);
+#ifdef __QUOTA_V3_PARANOIA
+	if (i == V3_DQSTRINBLK) {
+		printk(KERN_ERR "VFS: find_free_dqentry(): "
+		       "Data block full but it shouldn't.\n");
+		*err = -EIO;
+		goto out_buf;
+	}
+#endif
+	*err = write_blk(sb, dquot->dq_type, blk, buf);
+	if (*err < 0) {
+		printk(KERN_ERR "VFS: find_free_dqentry(): "
+		       "Can't write quota data block %u.\n", blk);
+		goto out_buf;
+	}
+	dquot->dq_off = (blk<<V3_DQBLKSIZE_BITS)+
+			sizeof(struct v3_disk_dqdbheader)+
+			i*sizeof(struct v3_disk_dqblk);
+	freedqbuf(buf);
+	return blk;
+out_buf:
+	freedqbuf(buf);
+	return 0;
+}
+
+/* Insert reference to structure into the trie */
+static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth)
+{
+	struct super_block *sb = dquot->dq_sb;
+	dqbuf_t buf;
+	int ret = 0, newson = 0, newact = 0;
+	__le32 *ref;
+	uint newblk;
+
+	buf = getdqbuf();
+	if (!buf)
+		return -ENOMEM;
+	if (!*treeblk) {
+		ret = get_free_dqblk(sb, dquot->dq_type);
+		if (ret < 0)
+			goto out_buf;
+		*treeblk = ret;
+		memset(buf, 0, V3_DQBLKSIZE);
+		newact = 1;
+	} else {
+		ret = read_blk(sb, dquot->dq_type, *treeblk, buf);
+		if (ret < 0) {
+			printk(KERN_ERR "VFS: Can't read tree quota "
+				"block %u.\n", *treeblk);
+			goto out_buf;
+		}
+	}
+	ref = (__le32 *)buf;
+	newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
+	if (!newblk)
+		newson = 1;
+	if (depth == V3_DQTREEDEPTH-1) {
+#ifdef __QUOTA_V3_PARANOIA
+		if (newblk) {
+			printk(KERN_ERR "VFS: Inserting already present "
+				"quota entry (block %u).\n",
+			le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]));
+			ret = -EIO;
+			goto out_buf;
+		}
+#endif
+		newblk = find_free_dqentry(dquot, &ret);
+	} else
+		ret = do_insert_tree(dquot, &newblk, depth+1);
+	if (newson && ret >= 0) {
+		ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
+		ret = write_blk(sb, dquot->dq_type, *treeblk, buf);
+	} else if (newact && ret < 0)
+		put_free_dqblk(sb, dquot->dq_type, buf, *treeblk);
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Wrapper for inserting quota structure into tree */
+static inline int dq_insert_tree(struct dquot *dquot)
+{
+	int tmp = V3_DQTREEOFF;
+	return do_insert_tree(dquot, &tmp, 0);
+}
+
+/*
+ *	We don't have to be afraid of deadlocks
+ *	as we never have quotas on quota files...
+ */
+static int v3_write_dquot(struct dquot *dquot)
+{
+	int type = dquot->dq_type;
+	ssize_t ret;
+	struct v3_disk_dqblk ddquot, empty;
+
+	/* dq_off is guarded by dqio_sem */
+	if (!dquot->dq_off) {
+		ret = dq_insert_tree(dquot);
+		if (ret < 0) {
+			printk(KERN_ERR "VFS: Error %zd occurred "
+				"while creating quota.\n", ret);
+			return ret;
+		}
+	}
+	spin_lock(&dq_data_lock);
+	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
+	/* Argh... We may need to write structure full of zeroes but that would
+	 * be treated as an empty place by the rest of the code. Format change
+	 * would be definitely cleaner but the problems are not worth it */
+	memset(&empty, 0, sizeof(struct v3_disk_dqblk));
+	if (!memcmp(&empty, &ddquot, sizeof(struct v3_disk_dqblk)))
+		ddquot.dqb_itime = cpu_to_le64(1);
+	spin_unlock(&dq_data_lock);
+	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
+	      (char *)&ddquot, sizeof(struct v3_disk_dqblk), dquot->dq_off);
+	if (ret != sizeof(struct v3_disk_dqblk)) {
+		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
+			dquot->dq_sb->s_id);
+		if (ret >= 0)
+			ret = -ENOSPC;
+	} else
+		ret = 0;
+	dqstats.writes++;
+
+	return ret;
+}
+
+/* Free dquot entry in data block */
+static int free_dqentry(struct dquot *dquot, uint blk)
+{
+	struct super_block *sb = dquot->dq_sb;
+	int type = dquot->dq_type;
+	struct v3_disk_dqdbheader *dh;
+	dqbuf_t buf = getdqbuf();
+	int ret = 0;
+
+	if (!buf)
+		return -ENOMEM;
+	if (dquot->dq_off >> V3_DQBLKSIZE_BITS != blk) {
+		printk(KERN_ERR "VFS: Quota structure has offset to other "
+		  "block (%u) than it should (%u).\n", blk,
+		  (uint)(dquot->dq_off >> V3_DQBLKSIZE_BITS));
+		goto out_buf;
+	}
+	ret = read_blk(sb, type, blk, buf);
+	if (ret < 0) {
+		printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
+		goto out_buf;
+	}
+	dh = (struct v3_disk_dqdbheader *)buf;
+	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1);
+	if (!le16_to_cpu(dh->dqdh_entries)) {
+		ret = remove_free_dqentry(sb, type, buf, blk);
+		if (ret < 0 ||
+		    (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
+			printk(KERN_ERR "VFS: Can't move quota data block (%u) "
+			  "to free list.\n", blk);
+			goto out_buf;
+		}
+	} else {
+		memset(buf+(dquot->dq_off & ((1 << V3_DQBLKSIZE_BITS)-1)), 0,
+		  sizeof(struct v3_disk_dqblk));
+		if (le16_to_cpu(dh->dqdh_entries) == V3_DQSTRINBLK-1) {
+			/* Insert will write block itself */
+			ret = insert_free_dqentry(sb, type, buf, blk);
+			if (ret < 0) {
+				printk(KERN_ERR "VFS: Can't insert quota data "
+				       "block (%u) to free entry list.\n", blk);
+				goto out_buf;
+			}
+		} else
+			ret = write_blk(sb, type, blk, buf);
+			if (ret < 0) {
+				printk(KERN_ERR "VFS: Can't write quota data "
+				  "block %u\n", blk);
+				goto out_buf;
+			}
+	}
+	dquot->dq_off = 0;	/* Quota is now unattached */
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Remove reference to dquot from tree */
+static int remove_tree(struct dquot *dquot, uint *blk, int depth)
+{
+	struct super_block *sb = dquot->dq_sb;
+	int type = dquot->dq_type;
+	dqbuf_t buf = getdqbuf();
+	int ret = 0;
+	uint newblk;
+	__le32 *ref = (__le32 *)buf;
+
+	if (!buf)
+		return -ENOMEM;
+	ret = read_blk(sb, type, *blk, buf);
+	if (ret < 0) {
+		printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
+		goto out_buf;
+	}
+	newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
+	if (depth == V3_DQTREEDEPTH-1) {
+		ret = free_dqentry(dquot, newblk);
+		newblk = 0;
+	} else
+		ret = remove_tree(dquot, &newblk, depth+1);
+	if (ret >= 0 && !newblk) {
+		int i;
+		ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
+		for (i = 0; i < V3_DQBLKSIZE && !buf[i]; i++);
+		/* Don't put the root block into the free block list */
+		if (i == V3_DQBLKSIZE && *blk != V3_DQTREEOFF) {
+			put_free_dqblk(sb, type, buf, *blk);
+			*blk = 0;
+		} else {
+			ret = write_blk(sb, type, *blk, buf);
+			if (ret < 0)
+				printk(KERN_ERR "VFS: Can't write quota tree "
+				  "block %u.\n", *blk);
+		}
+	}
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Delete dquot from tree */
+static int v3_delete_dquot(struct dquot *dquot)
+{
+	uint tmp = V3_DQTREEOFF;
+
+	if (!dquot->dq_off)	/* Even not allocated? */
+		return 0;
+	return remove_tree(dquot, &tmp, 0);
+}
+
+/* Find entry in block */
+static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
+{
+	dqbuf_t buf = getdqbuf();
+	loff_t ret = 0;
+	int i;
+	struct v3_disk_dqblk *ddquot = GETENTRIES(buf);
+
+	if (!buf)
+		return -ENOMEM;
+	ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf);
+	if (ret < 0) {
+		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+		goto out_buf;
+	}
+	if (dquot->dq_id)
+		for (i = 0; i < V3_DQSTRINBLK &&
+		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
+	else {	/* ID 0 as a bit more complicated searching... */
+		struct v3_disk_dqblk fakedquot;
+
+		memset(&fakedquot, 0, sizeof(struct v3_disk_dqblk));
+		for (i = 0; i < V3_DQSTRINBLK; i++)
+			if (!le32_to_cpu(ddquot[i].dqb_id) &&
+			    memcmp(&fakedquot, ddquot+i,
+				   sizeof(struct v3_disk_dqblk)))
+				break;
+	}
+	if (i == V3_DQSTRINBLK) {
+		printk(KERN_ERR "VFS: Quota for id %u referenced "
+		  "but not present.\n", dquot->dq_id);
+		ret = -EIO;
+		goto out_buf;
+	} else
+		ret = (blk << V3_DQBLKSIZE_BITS) + sizeof(struct
+		  v3_disk_dqdbheader) + i * sizeof(struct v3_disk_dqblk);
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Find entry for given id in the tree */
+static loff_t find_tree_dqentry(struct dquot *dquot, uint blk, int depth)
+{
+	dqbuf_t buf = getdqbuf();
+	loff_t ret = 0;
+	__le32 *ref = (__le32 *)buf;
+
+	if (!buf)
+		return -ENOMEM;
+	ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf);
+	if (ret < 0) {
+		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+		goto out_buf;
+	}
+	ret = 0;
+	blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
+	if (!blk)	/* No reference? */
+		goto out_buf;
+	if (depth < V3_DQTREEDEPTH-1)
+		ret = find_tree_dqentry(dquot, blk, depth+1);
+	else
+		ret = find_block_dqentry(dquot, blk);
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Find entry for given id in the tree - wrapper function */
+static inline loff_t find_dqentry(struct dquot *dquot)
+{
+	return find_tree_dqentry(dquot, V3_DQTREEOFF, 0);
+}
+
+static int v3_read_dquot(struct dquot *dquot)
+{
+	int type = dquot->dq_type;
+	loff_t offset;
+	struct v3_disk_dqblk ddquot, empty;
+	int ret = 0;
+
+#ifdef __QUOTA_V3_PARANOIA
+	/* Invalidated quota? */
+	if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
+		printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
+		return -EIO;
+	}
+#endif
+	offset = find_dqentry(dquot);
+	if (offset <= 0) {
+		if (offset < 0)
+			printk(KERN_ERR "VFS: Can't read quota "
+			  "structure for id %u.\n", dquot->dq_id);
+		dquot->dq_off = 0;
+		set_bit(DQ_FAKE_B, &dquot->dq_flags);
+		memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
+		ret = offset;
+	} else {
+		dquot->dq_off = offset;
+		ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
+		      (char *)&ddquot, sizeof(struct v3_disk_dqblk), offset);
+		if (ret != sizeof(struct v3_disk_dqblk)) {
+			if (ret >= 0)
+				ret = -EIO;
+			printk(KERN_ERR "VFS: Error while reading quota "
+			  "structure for id %u.\n", dquot->dq_id);
+			memset(&ddquot, 0, sizeof(struct v3_disk_dqblk));
+		} else {
+			ret = 0;
+			/* We need to escape back all-zero structure */
+			memset(&empty, 0, sizeof(struct v3_disk_dqblk));
+			empty.dqb_itime = cpu_to_le64(1);
+			if (!memcmp(&empty, &ddquot,
+			    sizeof(struct v3_disk_dqblk)))
+				ddquot.dqb_itime = 0;
+		}
+		disk2memdqb(&dquot->dq_dqb, &ddquot);
+		if (!dquot->dq_dqb.dqb_bhardlimit &&
+			!dquot->dq_dqb.dqb_bsoftlimit &&
+			!dquot->dq_dqb.dqb_ihardlimit &&
+			!dquot->dq_dqb.dqb_isoftlimit)
+			set_bit(DQ_FAKE_B, &dquot->dq_flags);
+	}
+	dqstats.reads++;
+
+	return ret;
+}
+
+/* Check whether dquot should not be deleted. We know we are
+ * the only one operating on dquot (thanks to dq_lock) */
+static int v3_release_dquot(struct dquot *dquot)
+{
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) &&
+	    !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace))
+		return v3_delete_dquot(dquot);
+	return 0;
+}
+
+static struct quota_format_ops v3_format_ops = {
+	.check_quota_file	= v3_check_quota_file,
+	.read_file_info		= v3_read_file_info,
+	.write_file_info	= v3_write_file_info,
+	.free_file_info		= NULL,
+	.read_dqblk		= v3_read_dquot,
+	.commit_dqblk		= v3_write_dquot,
+	.release_dqblk		= v3_release_dquot,
+};
+
+static struct quota_format_type v3_quota_format = {
+	.qf_fmt_id	= QFMT_VFS_V1,
+	.qf_ops		= &v3_format_ops,
+	.qf_owner	= THIS_MODULE
+};
+
+static int __init init_v3_quota_format(void)
+{
+	return register_quota_format(&v3_quota_format);
+}
+
+static void __exit exit_v3_quota_format(void)
+{
+	unregister_quota_format(&v3_quota_format);
+}
+
+module_init(init_v3_quota_format);
+module_exit(exit_v3_quota_format);
diff -rNpu quota.orig/include/linux/dqblk_v3.h quota/include/linux/dqblk_v3.h
--- quota.orig/include/linux/dqblk_v3.h	1970-01-01 03:00:00.000000000 +0300
+++ quota/include/linux/dqblk_v3.h	2008-02-27 16:31:45.964283988 +0300
@@ -0,0 +1,26 @@
+/*
+ *	Definitions of structures for vfsv1 quota format
+ */
+
+#ifndef _LINUX_DQBLK_V3_H
+#define _LINUX_DQBLK_V3_H
+
+#include <linux/types.h>
+
+/* id numbers of quota format */
+#define QFMT_VFS_V1 3
+
+/* Numbers of blocks needed for updates */
+#define V3_INIT_ALLOC 4
+#define V3_INIT_REWRITE 2
+#define V3_DEL_ALLOC 0
+#define V3_DEL_REWRITE 6
+
+/* Inmemory copy of version specific information */
+struct v3_mem_dqinfo {
+	unsigned int dqi_blocks;
+	unsigned int dqi_free_blk;
+	unsigned int dqi_free_entry;
+};
+
+#endif /* _LINUX_DQBLK_V3_H */
diff -rNpu quota.orig/include/linux/quota.h quota/include/linux/quota.h
--- quota.orig/include/linux/quota.h	2006-03-20 08:53:29.000000000 +0300
+++ quota/include/linux/quota.h	2008-02-27 16:30:15.306620367 +0300
@@ -136,24 +136,27 @@ struct if_dqinfo {
 #include <linux/dqblk_xfs.h>
 #include <linux/dqblk_v1.h>
 #include <linux/dqblk_v2.h>
+#include <linux/dqblk_v3.h>
 
 /* Maximal numbers of writes for quota operation (insert/delete/update)
  * (over VFS all formats) */
-#define DQUOT_INIT_ALLOC max(V1_INIT_ALLOC, V2_INIT_ALLOC)
-#define DQUOT_INIT_REWRITE max(V1_INIT_REWRITE, V2_INIT_REWRITE)
-#define DQUOT_DEL_ALLOC max(V1_DEL_ALLOC, V2_DEL_ALLOC)
-#define DQUOT_DEL_REWRITE max(V1_DEL_REWRITE, V2_DEL_REWRITE)
+#define DQUOT_INIT_ALLOC max(V1_INIT_ALLOC, max(V2_INIT_ALLOC, V3_INIT_ALLOC))
+#define DQUOT_INIT_REWRITE max(max(V2_INIT_REWRITE, V3_INIT_REWRITE),\
+				V1_INIT_REWRITE)
+#define DQUOT_DEL_ALLOC max(V1_DEL_ALLOC, max(V2_DEL_ALLOC, V3_DEL_ALLOC))
+#define DQUOT_DEL_REWRITE max(max(V2_DEL_REWRITE, V3_DEL_REWRITE),\
+				V1_DEL_REWRITE)
 
 /*
  * Data for one user/group kept in memory
  */
 struct mem_dqblk {
-	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
-	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
+	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
 	qsize_t dqb_curspace;	/* current used space */
-	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__u32 dqb_isoftlimit;	/* preferred inode limit */
-	__u32 dqb_curinodes;	/* current # allocated inodes */
+	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	qsize_t dqb_isoftlimit;	/* preferred inode limit */
+	qsize_t dqb_curinodes;	/* current # allocated inodes */
 	time_t dqb_btime;	/* time limit for excessive disk use */
 	time_t dqb_itime;	/* time limit for excessive inode use */
 };
@@ -172,6 +173,7 @@ struct mem_dqinfo {
 	union {
 		struct v1_mem_dqinfo v1_i;
 		struct v2_mem_dqinfo v2_i;
+		struct v3_mem_dqinfo v3_i;
 	} u;
 };
 
@@ -315,6 +317,7 @@ struct quota_module_name {
 #define INIT_QUOTA_MODULE_NAMES {\
 	{QFMT_VFS_OLD, "quota_v1"},\
 	{QFMT_VFS_V0, "quota_v2"},\
+	{QFMT_VFS_V1, "quota_v3"},\
 	{0, NULL}}
 
 #else
diff -rNpu quota.orig/include/linux/quotaio_v3.h quota/include/linux/quotaio_v3.h
--- quota.orig/include/linux/quotaio_v3.h	1970-01-01 03:00:00.000000000 +0300
+++ quota/include/linux/quotaio_v3.h	2008-02-29 19:16:26.281092724 +0300
@@ -0,0 +1,81 @@
+/*
+ *	Definitions of structures for vfsv1quota format
+ */
+
+#ifndef _LINUX_QUOTAIO_V3_H
+#define _LINUX_QUOTAIO_V3_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/*
+ * Definitions of magics and versions of current quota files
+ */
+#define V3_INITQMAGICS {\
+	0xd9c01f11,	/* USRQUOTA */\
+	0xd9c01927	/* GRPQUOTA */\
+}
+
+#define V3_INITQVERSIONS {\
+	1,		/* USRQUOTA */\
+	1		/* GRPQUOTA */\
+}
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is a radix tree whose leaves point
+ * to blocks of these structures.
+ */
+struct v3_disk_dqblk {
+	__le32 dqb_id;		/* id this quota applies to */
+	__le32 dqb_padding;	/* padding field */
+	__le64 dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	__le64 dqb_isoftlimit;	/* preferred inode limit */
+	__le64 dqb_curinodes;	/* current # allocated inodes */
+	__le64 dqb_bhardlimit;	/* absolute limit on disk space (in blocks) */
+	__le64 dqb_bsoftlimit;	/* preferred limit on disk space (in blocks) */
+	__le64 dqb_curspace;	/* current space occupied (in bytes) */
+	__le64 dqb_btime;	/* time limit for excessive disk use */
+	__le64 dqb_itime;	/* time limit for excessive inode use */
+};
+
+/*
+ * Here are header structures as written on disk and their in-memory copies
+ */
+/* First generic header */
+struct v3_disk_dqheader {
+	__le32 dqh_magic;	/* Magic number identifying file */
+	__le32 dqh_version;	/* File version */
+};
+
+/* Header with type and version specific information */
+struct v3_disk_dqinfo {
+	__le32 dqi_bgrace;	/* Time before block soft limit becomes hard */
+	__le32 dqi_igrace;	/* Time before inode soft limit becomes hard */
+	__le32 dqi_flags;	/* Flags for quotafile (DQF_*) */
+	__le32 dqi_blocks;	/* Number of blocks in file */
+	__le32 dqi_free_blk;	/* Number of first free block in the list */
+	__le32 dqi_free_entry;	/* Number of block with a free entry */
+};
+
+/*
+ *  Structure of header of block with quota structures. It is padded to
+ *  16 bytes so there will be space for exactly 21 quota-entries in a block
+ */
+struct v3_disk_dqdbheader {
+	__le32 dqdh_next_free;	/* Number of next block with free entry */
+	__le32 dqdh_prev_free;	/* Number of previous block with free entry */
+	__le16 dqdh_entries;	/* Number of valid entries in block */
+	__le16 dqdh_pad1;
+	__le32 dqdh_pad2;
+};
+
+#define V3_DQINFOOFF	sizeof(struct v3_disk_dqheader)
+#define V3_DQBLKSIZE_BITS	10
+#define V3_DQBLKSIZE	(1 << V3_DQBLKSIZE_BITS)
+#define V3_DQTREEOFF	1		/* Offset of tree in file in blocks */
+#define V3_DQTREEDEPTH	4		/* Depth of quota tree */
+#define V3_DQSTRINBLK	((V3_DQBLKSIZE - sizeof(struct v3_disk_dqdbheader)) / \
+			sizeof(struct v3_disk_dqblk))
+
+#endif /* _LINUX_QUOTAIO_V3_H */
diff -rNpu quota.orig/fs/ext3/super.c quota/fs/ext3/super.c
--- quota.orig/fs/ext3/super.c	2008-01-24 14:33:52.000000000 +0300
+++ quota/fs/ext3/super.c	2008-03-01 17:50:26.000000000 +0300
@@ -522,7 +522,8 @@ static inline void ext3_show_quota_optio
 
 	if (sbi->s_jquota_fmt)
 		seq_printf(seq, ",jqfmt=%s",
-		(sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
+		(sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold":
+		((sbi->s_jquota_fmt == QFMT_VFS_V0) ? "vfsv0" : "vfsv1"));
 
 	if (sbi->s_qf_names[USRQUOTA])
 		seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -673,7 +674,7 @@ enum {
 	Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
-	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
+	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
-	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
+	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, Opt_noquota,
 	Opt_grpquota
 };
@@ -719,6 +720,7 @@ static match_table_t tokens = {
 	{Opt_grpjquota, "grpjquota=%s"},
 	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
 	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
+	{Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
 	{Opt_grpquota, "grpquota"},
 	{Opt_noquota, "noquota"},
 	{Opt_quota, "quota"},
@@ -990,6 +992,9 @@ clear_qf_name:
 		case Opt_jqfmt_vfsv0:
 			sbi->s_jquota_fmt = QFMT_VFS_V0;
 			break;
+		case Opt_jqfmt_vfsv1:
+			sbi->s_jquota_fmt = QFMT_VFS_V1;
+			break;
 		case Opt_quota:
 		case Opt_usrquota:
 			set_opt(sbi->s_mount_opt, QUOTA);
@@ -1019,6 +1024,7 @@ clear_qf_name:
 		case Opt_offgrpjquota:
 		case Opt_jqfmt_vfsold:
 		case Opt_jqfmt_vfsv0:
+		case Opt_jqfmt_vfsv1:
 			printk(KERN_ERR
 				"EXT3-fs: journalled quota options not "
 				"supported.\n");
diff -rNpu quota.orig/fs/reiserfs/super.c quota/fs/reiserfs/super.c
--- quota.orig/fs/reiserfs/super.c	2008-01-24 14:33:52.000000000 +0300
+++ quota/fs/reiserfs/super.c	2008-03-01 17:51:12.000000000 +0300
@@ -1021,6 +1021,8 @@ static int reiserfs_parse_options(struct
 				REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD;
 			else if (!strcmp(arg, "vfsv0"))
 				REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0;
+			else if (!strcmp(arg, "vfsv1"))
+				REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V1;
 			else {
 				reiserfs_warning(s,
 						 "reiserfs_parse_options: unknown quota format specified.");

Signed-off-by: Andrew Perepechko <andrew.perepechko@sun.com>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs
  2008-03-06 13:41 [RFC] quota: 64-bit limits with vfs Andrew Perepechko
@ 2008-03-06 14:48 ` Jan Kara
  2008-03-09 22:46   ` Andrew Perepechko
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Kara @ 2008-03-06 14:48 UTC (permalink / raw)
  To: Andrew Perepechko
  Cc: linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

  Hello,

  Sorry for not responding for a few days. I was busy with other things.

On Thu 06-03-08 16:41:11, Andrew Perepechko wrote:
> We are in need of large (above 4 TB) block quota limits, but it seems like XFS filesystem 
> (having its own quota implementation) is the only available fs that supports them. Currently
> ext3 supports up to 8 TB of data and forthcoming ext4 will support even more.
> 
> Linux kernel has two implementations of quota format modules:
> quota_v1 (with QFMT_VFS_OLD id)
> quota_v2 (with QFMT_VFS_V0 id)
> Either uses 32-bit data types to store quota limits on disk
> (see struct v1_disk_dqblk and struct v2_disk_dqblk). Block quota limits 
> are stored in 1kb units (QUOTABLOCK_SIZE constant) which gives
> the largest possible quota limit of (2^32-1)*2^10 bytes ~ 4 TB.
> 
> In-memory quota entries representation suffers from the same 4 TB 
> limitation (see struct mem_dqblk).
> 
> The patch below adds a separate quota_v3 module which deals with 64-bit data  to solve the problem
> (another possible approach is to merge the code into quota_v2 module to reuse some amount of the code - 
> this won't reuse a lot because there're too many references to disk_dqblk structures and dependent constants).
> 
> Could you comment on the patch and the idea behind it in general?
  Just from a quick look. There seem to be actually two separate changes:
1) Change current formats so that they refuse to set quota above treshold they
are able to handle. That's fine a we should do that (in a separate patch,
please).

2) Implement new format able to handle more that 4TB limits. In principle,
that is fine but vfsv0 format has actually been designed so that similar
changes can go mostly invisible for userspace (modulo some tools updates
etc.). Given that the format itself doesn't change that much, we definitely
do not need to introduce completely new quota format. I'd just increase the
version number. Also I'd like to avoid unnecessary code duplication. The
only thing that is really different are just the conversion routines from
disk to memory. So I'd just modify the code in fs/quota_v2.c so that it
supports both versions of the quota format - you need to parametrize macros
like GETENTRIES(), V2_DQSTRINBLK, ... (actually make inline functions of
them when we are changing it), probably make union of struct v2_disk_dqblk
including structures for both versions and change sizeof(struct
v2_disk_dqblk) to some function. But all this shouldn't be that hard to do
in a nice way...

									Honza


> ---
> 
>  fs/Kconfig                 |    7
>  fs/Makefile                |    1
>  fs/ext3/super.c            |   12
>  fs/quota_v1.c              |   50 ++-
>  fs/quota_v2.c              |   45 +-
>  fs/quota_v3.c              |  739 +++++++++++++++++++++++++++++++++++++++++++++
>  fs/reiserfs/super.c        |    2
>  include/linux/dqblk_v3.h   |   26 +
>  include/linux/quota.h      |   23 -
>  include/linux/quotaio_v3.h |   81 ++++
>  10 files changed, 952 insertions(+), 34 deletions(-)
> 
> ---
> 
> diff -rNpu quota.orig/fs/Kconfig quota/fs/Kconfig
> --- quota.orig/fs/Kconfig	2008-01-24 14:33:56.000000000 +0300
> +++ quota/fs/Kconfig	2008-02-27 16:49:56.108413855 +0300
> @@ -488,6 +488,13 @@ config QFMT_V2
>  	  This quota format allows using quotas with 32-bit UIDs/GIDs. If you
>  	  need this functionality say Y here.
>  
> +config QFMT_V3
> +	tristate "Quota format v3 support"
> +	depends on QUOTA
> +	help
> +	  This quota format allows using quotas with 32-bit UIDs/GIDs and 64-bit
> +	  limits. If you need this functionality say Y here.
> +
>  config QUOTACTL
>  	bool
>  	depends on XFS_QUOTA || QUOTA
> diff -rNpu quota.orig/fs/Makefile quota/fs/Makefile
> --- quota.orig/fs/Makefile	2008-01-24 14:33:54.000000000 +0300
> +++ quota/fs/Makefile	2008-02-27 16:50:13.436477156 +0300
> @@ -40,6 +40,7 @@ obj-$(CONFIG_GENERIC_ACL)	+= generic_acl
>  obj-$(CONFIG_QUOTA)		+= dquot.o
>  obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
>  obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
> +obj-$(CONFIG_QFMT_V3)		+= quota_v3.o
>  obj-$(CONFIG_QUOTACTL)		+= quota.o
>  
>  obj-$(CONFIG_DMAPI)		+= dmapi/
> diff -rNpu quota.orig/fs/quota_v1.c quota/fs/quota_v1.c
> --- quota.orig/fs/quota_v1.c	2006-03-20 08:53:29.000000000 +0300
> +++ quota/fs/quota_v1.c	2008-02-29 19:15:23.325159161 +0300
> @@ -25,8 +25,16 @@ static void v1_disk2mem_dqblk(struct mem
>  	m->dqb_btime = d->dqb_btime;
>  }
>  
> -static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m)
> +static int v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m)
>  {
> +	__u32 typelimit = ~((__u32)0);
> +
> +	if (m->dqb_ihardlimit > typelimit ||
> +	    m->dqb_isoftlimit > typelimit ||
> +	    m->dqb_bhardlimit > typelimit ||
> +	    m->dqb_bsoftlimit > typelimit)
> +		return -EINVAL;
> +
>  	d->dqb_ihardlimit = m->dqb_ihardlimit;
>  	d->dqb_isoftlimit = m->dqb_isoftlimit;
>  	d->dqb_curinodes = m->dqb_curinodes;
> @@ -35,6 +43,8 @@ static void v1_mem2disk_dqblk(struct v1_
>  	d->dqb_curblocks = toqb(m->dqb_curspace);
>  	d->dqb_itime = m->dqb_itime;
>  	d->dqb_btime = m->dqb_btime;
> +
> +	return 0;
>  }
>  
>  static int v1_read_dqblk(struct dquot *dquot)
> @@ -64,7 +74,10 @@ static int v1_commit_dqblk(struct dquot
>  	ssize_t ret;
>  	struct v1_disk_dqblk dqblk;
>  
> -	v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb);
> +	ret = v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb);
> +	if (ret < 0)
> +		return ret;
> +
>  	if (dquot->dq_id == 0) {
>  		dqblk.dqb_btime = sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace;
>  		dqblk.dqb_itime = sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace;
> @@ -88,7 +101,7 @@ out:
>  	return ret;
>  }
>  
> -/* Magics of new quota format */
> +/* Magics of vfsv0 quota format */
>  #define V2_INITQMAGICS {\
>  	0xd9c01f11,     /* USRQUOTA */\
>  	0xd9c01927      /* GRPQUOTA */\
> @@ -100,15 +113,29 @@ struct v2_disk_dqheader {
>  	__le32 dqh_version;      /* File version */
>  };
>  
> +/* Magics of vfsv1 quota format */
> +#define V3_INITQMAGICS {\
> +	0xd9c01f11,     /* USRQUOTA */\
> +	0xd9c01927      /* GRPQUOTA */\
> +}
> +
> +/* Header of new quota format */
> +struct v3_disk_dqheader {
> +	__le32 dqh_magic;        /* Magic number identifying file */
> +	__le32 dqh_version;      /* File version */
> +};
> +
>  static int v1_check_quota_file(struct super_block *sb, int type)
>  {
>  	struct inode *inode = sb_dqopt(sb)->files[type];
>  	ulong blocks;
>  	size_t off; 
> -	struct v2_disk_dqheader dqhead;
> -	ssize_t size;
> +	struct v2_disk_dqheader dqhead_v2;
> +	struct v3_disk_dqheader dqhead_v3;
> +	ssize_t size_v2, size_v3;
>  	loff_t isize;
> -	static const uint quota_magics[] = V2_INITQMAGICS;
> +	static const uint quota_magics_v2[] = V2_INITQMAGICS,
> +			  quota_magics_v3[] = V3_INITQMAGICS;
>  
>  	isize = i_size_read(inode);
>  	if (!isize)
> @@ -118,10 +145,15 @@ static int v1_check_quota_file(struct su
>  	if ((blocks % sizeof(struct v1_disk_dqblk) * BLOCK_SIZE + off) % sizeof(struct v1_disk_dqblk))
>  		return 0;
>  	/* Doublecheck whether we didn't get file with new format - with old quotactl() this could happen */
> -	size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
> -	if (size != sizeof(struct v2_disk_dqheader))
> +	size_v2 = sb->s_op->quota_read(sb, type, (char *)&dqhead_v2,
> +				       sizeof(struct v2_disk_dqheader), 0);
> +	size_v3 = sb->s_op->quota_read(sb, type, (char *)&dqhead_v3,
> +				       sizeof(struct v3_disk_dqheader), 0);
> +	if (size_v2 != sizeof(struct v2_disk_dqheader) &&
> +	    size_v3 != sizeof(struct v3_disk_dqheader))
>  		return 1;	/* Probably not new format */
> -	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type])
> +	if (le32_to_cpu(dqhead_v2.dqh_magic) != quota_magics_v2[type] &&
> +	    le32_to_cpu(dqhead_v3.dqh_magic) != quota_magics_v3[type])
>  		return 1;	/* Definitely not new format */
>  	printk(KERN_INFO "VFS: %s: Refusing to turn on old quota format on given file. It probably contains newer quota format.\n", sb->s_id);
>          return 0;		/* Seems like a new format file -> refuse it */
> diff -rNpu quota.orig/fs/quota_v2.c quota/fs/quota_v2.c
> --- quota.orig/fs/quota_v2.c	2006-03-20 08:53:29.000000000 +0300
> +++ quota/fs/quota_v2.c	2008-02-28 10:37:31.067602129 +0300
> @@ -106,17 +106,27 @@ static void disk2memdqb(struct mem_dqblk
>  	m->dqb_btime = le64_to_cpu(d->dqb_btime);
>  }
>  
> -static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
> +static int mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
>  {
> -	d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
> -	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
> -	d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
> +	__u32 typelimit = ~((__u32)0);
> +
> +	if (m->dqb_ihardlimit > typelimit ||
> +	    m->dqb_isoftlimit > typelimit ||
> +	    m->dqb_bhardlimit > typelimit ||
> +	    m->dqb_bsoftlimit > typelimit)
> +		return -EINVAL;
> +
> +	d->dqb_ihardlimit = cpu_to_le32((__u32)m->dqb_ihardlimit);
> +	d->dqb_isoftlimit = cpu_to_le32((__u32)m->dqb_isoftlimit);
> +	d->dqb_curinodes = cpu_to_le32((__u32)m->dqb_curinodes);
>  	d->dqb_itime = cpu_to_le64(m->dqb_itime);
> -	d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
> -	d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
> +	d->dqb_bhardlimit = cpu_to_le32((__u32)m->dqb_bhardlimit);
> +	d->dqb_bsoftlimit = cpu_to_le32((__u32)m->dqb_bsoftlimit);
>  	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
>  	d->dqb_btime = cpu_to_le64(m->dqb_btime);
>  	d->dqb_id = cpu_to_le32(id);
> +
> +	return 0;
>  }
>  
>  static dqbuf_t getdqbuf(void)
> @@ -394,14 +404,12 @@ static int v2_write_dquot(struct dquot *
>  	ssize_t ret;
>  	struct v2_disk_dqblk ddquot, empty;
>  
> -	/* dq_off is guarded by dqio_sem */
> -	if (!dquot->dq_off)
> -		if ((ret = dq_insert_tree(dquot)) < 0) {
> -			printk(KERN_ERR "VFS: Error %zd occurred while creating quota.\n", ret);
> -			return ret;
> -		}
>  	spin_lock(&dq_data_lock);
> -	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
> +	ret = mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
> +	if (ret < 0) {
> +		spin_unlock(&dq_data_lock);
> +		return ret;
> +	}
>  	/* Argh... We may need to write structure full of zeroes but that would be
>  	 * treated as an empty place by the rest of the code. Format change would
>  	 * be definitely cleaner but the problems probably are not worth it */
> @@ -409,6 +417,17 @@ static int v2_write_dquot(struct dquot *
>  	if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
>  		ddquot.dqb_itime = cpu_to_le64(1);
>  	spin_unlock(&dq_data_lock);
> +
> +	/* dq_off is guarded by dqio_sem */
> +	if (!dquot->dq_off) {
> +		ret = dq_insert_tree(dquot);
> +		if (ret < 0) {
> +			printk(KERN_ERR "VFS: Error %zd occurred "
> +			       "while creating quota.\n", ret);
> +			return ret;
> +		}
> +	}
> +
>  	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
>  	      (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
>  	if (ret != sizeof(struct v2_disk_dqblk)) {
> diff -rNpu quota.orig/fs/quota_v3.c quota/fs/quota_v3.c
> --- quota.orig/fs/quota_v3.c	1970-01-01 03:00:00.000000000 +0300
> +++ quota/fs/quota_v3.c	2008-02-28 10:55:05.981528558 +0300
> @@ -0,0 +1,739 @@
> +/*
> + *	vfsv1 quota IO operations on file
> + *
> + *	adds support for quota limits above 4 TB
> + *
> + *	based on quota_v3.c by Jan Kara
> + */
> +
> +#include <linux/errno.h>
> +#include <linux/fs.h>
> +#include <linux/mount.h>
> +#include <linux/dqblk_v3.h>
> +#include <linux/quotaio_v3.h>
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +
> +#include <asm/byteorder.h>
> +
> +MODULE_DESCRIPTION("Quota format v3 support");
> +MODULE_LICENSE("GPL");
> +
> +#define __QUOTA_V3_PARANOIA
> +
> +typedef char *dqbuf_t;
> +
> +#define GETIDINDEX(id, depth) (((id) >> ((V3_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
> +#define GETENTRIES(buf) ((struct v3_disk_dqblk *) \
> +			  (((char *)buf)+sizeof(struct v3_disk_dqdbheader)))
> +
> +/* Check whether given file is really vfsv1 quotafile */
> +static int v3_check_quota_file(struct super_block *sb, int type)
> +{
> +	struct v3_disk_dqheader dqhead;
> +	ssize_t size;
> +	static const uint quota_magics[] = V3_INITQMAGICS;
> +	static const uint quota_versions[] = V3_INITQVERSIONS;
> +
> +	size = sb->s_op->quota_read(sb, type, (char *)&dqhead,
> +				    sizeof(struct v3_disk_dqheader), 0);
> +	if (size != sizeof(struct v3_disk_dqheader)) {
> +		printk(KERN_WARNING "quota_v3: failed read expected=%zd, "
> +		       "got=%zd\n", sizeof(struct v3_disk_dqheader), size);
> +		return 0;
> +	}
> +	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
> +	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
> +		return 0;
> +	return 1;
> +}
> +
> +/* Read information header from quota file */
> +static int v3_read_file_info(struct super_block *sb, int type)
> +{
> +	struct v3_disk_dqinfo dinfo;
> +	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
> +	ssize_t size;
> +
> +	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
> +	       sizeof(struct v3_disk_dqinfo), V3_DQINFOOFF);
> +	if (size != sizeof(struct v3_disk_dqinfo)) {
> +		printk(KERN_WARNING "Can't read info structure on device %s.\n",
> +			sb->s_id);
> +		return -1;
> +	}
> +	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
> +	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
> +	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
> +	info->u.v3_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
> +	info->u.v3_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
> +	info->u.v3_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
> +	return 0;
> +}
> +
> +/* Write information header to quota file */
> +static int v3_write_file_info(struct super_block *sb, int type)
> +{
> +	struct v3_disk_dqinfo dinfo;
> +	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
> +	ssize_t size;
> +
> +	spin_lock(&dq_data_lock);
> +	info->dqi_flags &= ~DQF_INFO_DIRTY;
> +	dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
> +	dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
> +	dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
> +	spin_unlock(&dq_data_lock);
> +	dinfo.dqi_blocks = cpu_to_le32(info->u.v3_i.dqi_blocks);
> +	dinfo.dqi_free_blk = cpu_to_le32(info->u.v3_i.dqi_free_blk);
> +	dinfo.dqi_free_entry = cpu_to_le32(info->u.v3_i.dqi_free_entry);
> +	size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
> +	       sizeof(struct v3_disk_dqinfo), V3_DQINFOOFF);
> +	if (size != sizeof(struct v3_disk_dqinfo)) {
> +		printk(KERN_WARNING "Can't write info structure "
> +			"on device %s.\n", sb->s_id);
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +static void disk2memdqb(struct mem_dqblk *m, struct v3_disk_dqblk *d)
> +{
> +	m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit);
> +	m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit);
> +	m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes);
> +	m->dqb_itime = le64_to_cpu(d->dqb_itime);
> +	m->dqb_bhardlimit = le64_to_cpu(d->dqb_bhardlimit);
> +	m->dqb_bsoftlimit = le64_to_cpu(d->dqb_bsoftlimit);
> +	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
> +	m->dqb_btime = le64_to_cpu(d->dqb_btime);
> +}
> +
> +static void mem2diskdqb(struct v3_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
> +{
> +	d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
> +	d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
> +	d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
> +	d->dqb_itime = cpu_to_le64(m->dqb_itime);
> +	d->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
> +	d->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
> +	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
> +	d->dqb_btime = cpu_to_le64(m->dqb_btime);
> +	d->dqb_id = cpu_to_le32(id);
> +}
> +
> +static dqbuf_t getdqbuf(void)
> +{
> +	dqbuf_t buf = kmalloc(V3_DQBLKSIZE, GFP_NOFS);
> +	if (!buf)
> +		printk(KERN_WARNING "Not enough memory for quota buffers.\n");
> +	return buf;
> +}
> +
> +static inline void freedqbuf(dqbuf_t buf)
> +{
> +	kfree(buf);
> +}
> +
> +static inline ssize_t read_blk(struct super_block *sb, int type,
> +				uint blk, dqbuf_t buf)
> +{
> +	memset(buf, 0, V3_DQBLKSIZE);
> +	return sb->s_op->quota_read(sb, type, (char *)buf,
> +	       V3_DQBLKSIZE, blk << V3_DQBLKSIZE_BITS);
> +}
> +
> +static inline ssize_t write_blk(struct super_block *sb, int type,
> +				 uint blk, dqbuf_t buf)
> +{
> +	return sb->s_op->quota_write(sb, type, (char *)buf,
> +	       V3_DQBLKSIZE, blk << V3_DQBLKSIZE_BITS);
> +}
> +
> +/* Remove empty block from list and return it */
> +static int get_free_dqblk(struct super_block *sb, int type)
> +{
> +	dqbuf_t buf = getdqbuf();
> +	struct mem_dqinfo *info = sb_dqinfo(sb, type);
> +	struct v3_disk_dqdbheader *dh = (struct v3_disk_dqdbheader *)buf;
> +	int ret, blk;
> +
> +	if (!buf)
> +		return -ENOMEM;
> +	if (info->u.v3_i.dqi_free_blk) {
> +		blk = info->u.v3_i.dqi_free_blk;
> +		ret = read_blk(sb, type, blk, buf);
> +		if (ret < 0)
> +			goto out_buf;
> +		info->u.v3_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
> +	} else {
> +		memset(buf, 0, V3_DQBLKSIZE);
> +		/* Assure block allocation... */
> +		ret = write_blk(sb, type, info->u.v3_i.dqi_blocks, buf);
> +		if (ret < 0)
> +			goto out_buf;
> +		blk = info->u.v3_i.dqi_blocks++;
> +	}
> +	mark_info_dirty(sb, type);
> +	ret = blk;
> +out_buf:
> +	freedqbuf(buf);
> +	return ret;
> +}
> +
> +/* Insert empty block to the list */
> +static int put_free_dqblk(struct super_block *sb, int type,
> +			   dqbuf_t buf, uint blk)
> +{
> +	struct mem_dqinfo *info = sb_dqinfo(sb, type);
> +	struct v3_disk_dqdbheader *dh = (struct v3_disk_dqdbheader *)buf;
> +	int err;
> +
> +	dh->dqdh_next_free = cpu_to_le32(info->u.v3_i.dqi_free_blk);
> +	dh->dqdh_prev_free = cpu_to_le32(0);
> +	dh->dqdh_entries = cpu_to_le16(0);
> +	info->u.v3_i.dqi_free_blk = blk;
> +	mark_info_dirty(sb, type);
> +	/* Some strange block. We had better leave it... */
> +	err = write_blk(sb, type, blk, buf);
> +	if (err < 0)
> +		return err;
> +	return 0;
> +}
> +
> +/* Remove given block from the list of blocks with free entries */
> +static int remove_free_dqentry(struct super_block *sb,
> +				int type, dqbuf_t buf, uint blk)
> +{
> +	dqbuf_t tmpbuf = getdqbuf();
> +	struct mem_dqinfo *info = sb_dqinfo(sb, type);
> +	struct v3_disk_dqdbheader *dh = (struct v3_disk_dqdbheader *)buf;
> +	uint nextblk = le32_to_cpu(dh->dqdh_next_free),
> +	     prevblk = le32_to_cpu(dh->dqdh_prev_free);
> +	int err;
> +
> +	if (!tmpbuf)
> +		return -ENOMEM;
> +	if (nextblk) {
> +		err = read_blk(sb, type, nextblk, tmpbuf);
> +		if (err < 0)
> +			goto out_buf;
> +		((struct v3_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free;
> +		err = write_blk(sb, type, nextblk, tmpbuf);
> +		if (err < 0)
> +			goto out_buf;
> +	}
> +	if (prevblk) {
> +		err = read_blk(sb, type, prevblk, tmpbuf);
> +		if (err < 0)
> +			goto out_buf;
> +		((struct v3_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free;
> +		err = write_blk(sb, type, prevblk, tmpbuf);
> +		if (err < 0)
> +			goto out_buf;
> +	} else {
> +		info->u.v3_i.dqi_free_entry = nextblk;
> +		mark_info_dirty(sb, type);
> +	}
> +	freedqbuf(tmpbuf);
> +	dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
> +	/* No matter whether write succeeds block is out of list */
> +	if (write_blk(sb, type, blk, buf) < 0)
> +		printk(KERN_ERR "VFS: Can't write block (%u) "
> +			"with free entries.\n", blk);
> +	return 0;
> +out_buf:
> +	freedqbuf(tmpbuf);
> +	return err;
> +}
> +
> +/* Insert given block to the beginning of list with free entries */
> +static int insert_free_dqentry(struct super_block *sb,
> +				int type, dqbuf_t buf, uint blk)
> +{
> +	dqbuf_t tmpbuf = getdqbuf();
> +	struct mem_dqinfo *info = sb_dqinfo(sb, type);
> +	struct v3_disk_dqdbheader *dh = (struct v3_disk_dqdbheader *)buf;
> +	int err;
> +
> +	if (!tmpbuf)
> +		return -ENOMEM;
> +	dh->dqdh_next_free = cpu_to_le32(info->u.v3_i.dqi_free_entry);
> +	dh->dqdh_prev_free = cpu_to_le32(0);
> +	err = write_blk(sb, type, blk, buf);
> +	if (err < 0)
> +		goto out_buf;
> +	if (info->u.v3_i.dqi_free_entry) {
> +		err = read_blk(sb, type, info->u.v3_i.dqi_free_entry, tmpbuf);
> +		if (err < 0)
> +			goto out_buf;
> +		((struct v3_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk);
> +		err = write_blk(sb, type, info->u.v3_i.dqi_free_entry, tmpbuf);
> +		if (err < 0)
> +			goto out_buf;
> +	}
> +	freedqbuf(tmpbuf);
> +	info->u.v3_i.dqi_free_entry = blk;
> +	mark_info_dirty(sb, type);
> +	return 0;
> +out_buf:
> +	freedqbuf(tmpbuf);
> +	return err;
> +}
> +
> +/* Find space for dquot */
> +static uint find_free_dqentry(struct dquot *dquot, int *err)
> +{
> +	struct super_block *sb = dquot->dq_sb;
> +	struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
> +	uint blk, i;
> +	struct v3_disk_dqdbheader *dh;
> +	struct v3_disk_dqblk *ddquot;
> +	struct v3_disk_dqblk fakedquot;
> +	dqbuf_t buf;
> +
> +	*err = 0;
> +	buf = getdqbuf();
> +	if (!buf) {
> +		*err = -ENOMEM;
> +		return 0;
> +	}
> +	dh = (struct v3_disk_dqdbheader *)buf;
> +	ddquot = GETENTRIES(buf);
> +	if (info->u.v3_i.dqi_free_entry) {
> +		blk = info->u.v3_i.dqi_free_entry;
> +		*err = read_blk(sb, dquot->dq_type, blk, buf);
> +		if (*err < 0)
> +			goto out_buf;
> +	} else {
> +		blk = get_free_dqblk(sb, dquot->dq_type);
> +		if ((int)blk < 0) {
> +			*err = blk;
> +			freedqbuf(buf);
> +			return 0;
> +		}
> +		memset(buf, 0, V3_DQBLKSIZE);
> +		/* This is enough as block is already zeroed */
> +		/* and entry list is empty...                */
> +		info->u.v3_i.dqi_free_entry = blk;
> +		mark_info_dirty(sb, dquot->dq_type);
> +	}
> +	if (le16_to_cpu(dh->dqdh_entries)+1 >= V3_DQSTRINBLK) {
> +		*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk);
> +		if (*err < 0) {
> +			printk(KERN_ERR "VFS: find_free_dqentry(): Can't "
> +			       "remove block (%u) from entry free list.\n",
> +			       blk);
> +			goto out_buf;
> +		}
> +	}
> +	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
> +	memset(&fakedquot, 0, sizeof(struct v3_disk_dqblk));
> +	/* Find free structure in block */
> +	for (i = 0; i < V3_DQSTRINBLK && memcmp(&fakedquot, ddquot+i,
> +		    sizeof(struct v3_disk_dqblk)); i++);
> +#ifdef __QUOTA_V3_PARANOIA
> +	if (i == V3_DQSTRINBLK) {
> +		printk(KERN_ERR "VFS: find_free_dqentry(): "
> +		       "Data block full but it shouldn't.\n");
> +		*err = -EIO;
> +		goto out_buf;
> +	}
> +#endif
> +	*err = write_blk(sb, dquot->dq_type, blk, buf);
> +	if (*err < 0) {
> +		printk(KERN_ERR "VFS: find_free_dqentry(): "
> +		       "Can't write quota data block %u.\n", blk);
> +		goto out_buf;
> +	}
> +	dquot->dq_off = (blk<<V3_DQBLKSIZE_BITS)+
> +			sizeof(struct v3_disk_dqdbheader)+
> +			i*sizeof(struct v3_disk_dqblk);
> +	freedqbuf(buf);
> +	return blk;
> +out_buf:
> +	freedqbuf(buf);
> +	return 0;
> +}
> +
> +/* Insert reference to structure into the trie */
> +static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth)
> +{
> +	struct super_block *sb = dquot->dq_sb;
> +	dqbuf_t buf;
> +	int ret = 0, newson = 0, newact = 0;
> +	__le32 *ref;
> +	uint newblk;
> +
> +	buf = getdqbuf();
> +	if (!buf)
> +		return -ENOMEM;
> +	if (!*treeblk) {
> +		ret = get_free_dqblk(sb, dquot->dq_type);
> +		if (ret < 0)
> +			goto out_buf;
> +		*treeblk = ret;
> +		memset(buf, 0, V3_DQBLKSIZE);
> +		newact = 1;
> +	} else {
> +		ret = read_blk(sb, dquot->dq_type, *treeblk, buf);
> +		if (ret < 0) {
> +			printk(KERN_ERR "VFS: Can't read tree quota "
> +				"block %u.\n", *treeblk);
> +			goto out_buf;
> +		}
> +	}
> +	ref = (__le32 *)buf;
> +	newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
> +	if (!newblk)
> +		newson = 1;
> +	if (depth == V3_DQTREEDEPTH-1) {
> +#ifdef __QUOTA_V3_PARANOIA
> +		if (newblk) {
> +			printk(KERN_ERR "VFS: Inserting already present "
> +				"quota entry (block %u).\n",
> +			le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]));
> +			ret = -EIO;
> +			goto out_buf;
> +		}
> +#endif
> +		newblk = find_free_dqentry(dquot, &ret);
> +	} else
> +		ret = do_insert_tree(dquot, &newblk, depth+1);
> +	if (newson && ret >= 0) {
> +		ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
> +		ret = write_blk(sb, dquot->dq_type, *treeblk, buf);
> +	} else if (newact && ret < 0)
> +		put_free_dqblk(sb, dquot->dq_type, buf, *treeblk);
> +out_buf:
> +	freedqbuf(buf);
> +	return ret;
> +}
> +
> +/* Wrapper for inserting quota structure into tree */
> +static inline int dq_insert_tree(struct dquot *dquot)
> +{
> +	int tmp = V3_DQTREEOFF;
> +	return do_insert_tree(dquot, &tmp, 0);
> +}
> +
> +/*
> + *	We don't have to be afraid of deadlocks
> + *	as we never have quotas on quota files...
> + */
> +static int v3_write_dquot(struct dquot *dquot)
> +{
> +	int type = dquot->dq_type;
> +	ssize_t ret;
> +	struct v3_disk_dqblk ddquot, empty;
> +
> +	/* dq_off is guarded by dqio_sem */
> +	if (!dquot->dq_off) {
> +		ret = dq_insert_tree(dquot);
> +		if (ret < 0) {
> +			printk(KERN_ERR "VFS: Error %zd occurred "
> +				"while creating quota.\n", ret);
> +			return ret;
> +		}
> +	}
> +	spin_lock(&dq_data_lock);
> +	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
> +	/* Argh... We may need to write structure full of zeroes but that would
> +	 * be treated as an empty place by the rest of the code. Format change
> +	 * would be definitely cleaner but the problems are not worth it */
> +	memset(&empty, 0, sizeof(struct v3_disk_dqblk));
> +	if (!memcmp(&empty, &ddquot, sizeof(struct v3_disk_dqblk)))
> +		ddquot.dqb_itime = cpu_to_le64(1);
> +	spin_unlock(&dq_data_lock);
> +	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
> +	      (char *)&ddquot, sizeof(struct v3_disk_dqblk), dquot->dq_off);
> +	if (ret != sizeof(struct v3_disk_dqblk)) {
> +		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
> +			dquot->dq_sb->s_id);
> +		if (ret >= 0)
> +			ret = -ENOSPC;
> +	} else
> +		ret = 0;
> +	dqstats.writes++;
> +
> +	return ret;
> +}
> +
> +/* Free dquot entry in data block */
> +static int free_dqentry(struct dquot *dquot, uint blk)
> +{
> +	struct super_block *sb = dquot->dq_sb;
> +	int type = dquot->dq_type;
> +	struct v3_disk_dqdbheader *dh;
> +	dqbuf_t buf = getdqbuf();
> +	int ret = 0;
> +
> +	if (!buf)
> +		return -ENOMEM;
> +	if (dquot->dq_off >> V3_DQBLKSIZE_BITS != blk) {
> +		printk(KERN_ERR "VFS: Quota structure has offset to other "
> +		  "block (%u) than it should (%u).\n", blk,
> +		  (uint)(dquot->dq_off >> V3_DQBLKSIZE_BITS));
> +		goto out_buf;
> +	}
> +	ret = read_blk(sb, type, blk, buf);
> +	if (ret < 0) {
> +		printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
> +		goto out_buf;
> +	}
> +	dh = (struct v3_disk_dqdbheader *)buf;
> +	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1);
> +	if (!le16_to_cpu(dh->dqdh_entries)) {
> +		ret = remove_free_dqentry(sb, type, buf, blk);
> +		if (ret < 0 ||
> +		    (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
> +			printk(KERN_ERR "VFS: Can't move quota data block (%u) "
> +			  "to free list.\n", blk);
> +			goto out_buf;
> +		}
> +	} else {
> +		memset(buf+(dquot->dq_off & ((1 << V3_DQBLKSIZE_BITS)-1)), 0,
> +		  sizeof(struct v3_disk_dqblk));
> +		if (le16_to_cpu(dh->dqdh_entries) == V3_DQSTRINBLK-1) {
> +			/* Insert will write block itself */
> +			ret = insert_free_dqentry(sb, type, buf, blk);
> +			if (ret < 0) {
> +				printk(KERN_ERR "VFS: Can't insert quota data "
> +				       "block (%u) to free entry list.\n", blk);
> +				goto out_buf;
> +			}
> +		} else
> +			ret = write_blk(sb, type, blk, buf);
> +			if (ret < 0) {
> +				printk(KERN_ERR "VFS: Can't write quota data "
> +				  "block %u\n", blk);
> +				goto out_buf;
> +			}
> +	}
> +	dquot->dq_off = 0;	/* Quota is now unattached */
> +out_buf:
> +	freedqbuf(buf);
> +	return ret;
> +}
> +
> +/* Remove reference to dquot from tree */
> +static int remove_tree(struct dquot *dquot, uint *blk, int depth)
> +{
> +	struct super_block *sb = dquot->dq_sb;
> +	int type = dquot->dq_type;
> +	dqbuf_t buf = getdqbuf();
> +	int ret = 0;
> +	uint newblk;
> +	__le32 *ref = (__le32 *)buf;
> +
> +	if (!buf)
> +		return -ENOMEM;
> +	ret = read_blk(sb, type, *blk, buf);
> +	if (ret < 0) {
> +		printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
> +		goto out_buf;
> +	}
> +	newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
> +	if (depth == V3_DQTREEDEPTH-1) {
> +		ret = free_dqentry(dquot, newblk);
> +		newblk = 0;
> +	} else
> +		ret = remove_tree(dquot, &newblk, depth+1);
> +	if (ret >= 0 && !newblk) {
> +		int i;
> +		ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
> +		for (i = 0; i < V3_DQBLKSIZE && !buf[i]; i++);
> +		/* Don't put the root block into the free block list */
> +		if (i == V3_DQBLKSIZE && *blk != V3_DQTREEOFF) {
> +			put_free_dqblk(sb, type, buf, *blk);
> +			*blk = 0;
> +		} else {
> +			ret = write_blk(sb, type, *blk, buf);
> +			if (ret < 0)
> +				printk(KERN_ERR "VFS: Can't write quota tree "
> +				  "block %u.\n", *blk);
> +		}
> +	}
> +out_buf:
> +	freedqbuf(buf);
> +	return ret;
> +}
> +
> +/* Delete dquot from tree */
> +static int v3_delete_dquot(struct dquot *dquot)
> +{
> +	uint tmp = V3_DQTREEOFF;
> +
> +	if (!dquot->dq_off)	/* Even not allocated? */
> +		return 0;
> +	return remove_tree(dquot, &tmp, 0);
> +}
> +
> +/* Find entry in block */
> +static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
> +{
> +	dqbuf_t buf = getdqbuf();
> +	loff_t ret = 0;
> +	int i;
> +	struct v3_disk_dqblk *ddquot = GETENTRIES(buf);
> +
> +	if (!buf)
> +		return -ENOMEM;
> +	ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf);
> +	if (ret < 0) {
> +		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
> +		goto out_buf;
> +	}
> +	if (dquot->dq_id)
> +		for (i = 0; i < V3_DQSTRINBLK &&
> +		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
> +	else {	/* ID 0 as a bit more complicated searching... */
> +		struct v3_disk_dqblk fakedquot;
> +
> +		memset(&fakedquot, 0, sizeof(struct v3_disk_dqblk));
> +		for (i = 0; i < V3_DQSTRINBLK; i++)
> +			if (!le32_to_cpu(ddquot[i].dqb_id) &&
> +			    memcmp(&fakedquot, ddquot+i,
> +				   sizeof(struct v3_disk_dqblk)))
> +				break;
> +	}
> +	if (i == V3_DQSTRINBLK) {
> +		printk(KERN_ERR "VFS: Quota for id %u referenced "
> +		  "but not present.\n", dquot->dq_id);
> +		ret = -EIO;
> +		goto out_buf;
> +	} else
> +		ret = (blk << V3_DQBLKSIZE_BITS) + sizeof(struct
> +		  v3_disk_dqdbheader) + i * sizeof(struct v3_disk_dqblk);
> +out_buf:
> +	freedqbuf(buf);
> +	return ret;
> +}
> +
> +/* Find entry for given id in the tree */
> +static loff_t find_tree_dqentry(struct dquot *dquot, uint blk, int depth)
> +{
> +	dqbuf_t buf = getdqbuf();
> +	loff_t ret = 0;
> +	__le32 *ref = (__le32 *)buf;
> +
> +	if (!buf)
> +		return -ENOMEM;
> +	ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf);
> +	if (ret < 0) {
> +		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
> +		goto out_buf;
> +	}
> +	ret = 0;
> +	blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
> +	if (!blk)	/* No reference? */
> +		goto out_buf;
> +	if (depth < V3_DQTREEDEPTH-1)
> +		ret = find_tree_dqentry(dquot, blk, depth+1);
> +	else
> +		ret = find_block_dqentry(dquot, blk);
> +out_buf:
> +	freedqbuf(buf);
> +	return ret;
> +}
> +
> +/* Find entry for given id in the tree - wrapper function */
> +static inline loff_t find_dqentry(struct dquot *dquot)
> +{
> +	return find_tree_dqentry(dquot, V3_DQTREEOFF, 0);
> +}
> +
> +static int v3_read_dquot(struct dquot *dquot)
> +{
> +	int type = dquot->dq_type;
> +	loff_t offset;
> +	struct v3_disk_dqblk ddquot, empty;
> +	int ret = 0;
> +
> +#ifdef __QUOTA_V3_PARANOIA
> +	/* Invalidated quota? */
> +	if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
> +		printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
> +		return -EIO;
> +	}
> +#endif
> +	offset = find_dqentry(dquot);
> +	if (offset <= 0) {
> +		if (offset < 0)
> +			printk(KERN_ERR "VFS: Can't read quota "
> +			  "structure for id %u.\n", dquot->dq_id);
> +		dquot->dq_off = 0;
> +		set_bit(DQ_FAKE_B, &dquot->dq_flags);
> +		memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
> +		ret = offset;
> +	} else {
> +		dquot->dq_off = offset;
> +		ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
> +		      (char *)&ddquot, sizeof(struct v3_disk_dqblk), offset);
> +		if (ret != sizeof(struct v3_disk_dqblk)) {
> +			if (ret >= 0)
> +				ret = -EIO;
> +			printk(KERN_ERR "VFS: Error while reading quota "
> +			  "structure for id %u.\n", dquot->dq_id);
> +			memset(&ddquot, 0, sizeof(struct v3_disk_dqblk));
> +		} else {
> +			ret = 0;
> +			/* We need to escape back all-zero structure */
> +			memset(&empty, 0, sizeof(struct v3_disk_dqblk));
> +			empty.dqb_itime = cpu_to_le64(1);
> +			if (!memcmp(&empty, &ddquot,
> +			    sizeof(struct v3_disk_dqblk)))
> +				ddquot.dqb_itime = 0;
> +		}
> +		disk2memdqb(&dquot->dq_dqb, &ddquot);
> +		if (!dquot->dq_dqb.dqb_bhardlimit &&
> +			!dquot->dq_dqb.dqb_bsoftlimit &&
> +			!dquot->dq_dqb.dqb_ihardlimit &&
> +			!dquot->dq_dqb.dqb_isoftlimit)
> +			set_bit(DQ_FAKE_B, &dquot->dq_flags);
> +	}
> +	dqstats.reads++;
> +
> +	return ret;
> +}
> +
> +/* Check whether dquot should not be deleted. We know we are
> + * the only one operating on dquot (thanks to dq_lock) */
> +static int v3_release_dquot(struct dquot *dquot)
> +{
> +	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) &&
> +	    !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace))
> +		return v3_delete_dquot(dquot);
> +	return 0;
> +}
> +
> +static struct quota_format_ops v3_format_ops = {
> +	.check_quota_file	= v3_check_quota_file,
> +	.read_file_info		= v3_read_file_info,
> +	.write_file_info	= v3_write_file_info,
> +	.free_file_info		= NULL,
> +	.read_dqblk		= v3_read_dquot,
> +	.commit_dqblk		= v3_write_dquot,
> +	.release_dqblk		= v3_release_dquot,
> +};
> +
> +static struct quota_format_type v3_quota_format = {
> +	.qf_fmt_id	= QFMT_VFS_V1,
> +	.qf_ops		= &v3_format_ops,
> +	.qf_owner	= THIS_MODULE
> +};
> +
> +static int __init init_v3_quota_format(void)
> +{
> +	return register_quota_format(&v3_quota_format);
> +}
> +
> +static void __exit exit_v3_quota_format(void)
> +{
> +	unregister_quota_format(&v3_quota_format);
> +}
> +
> +module_init(init_v3_quota_format);
> +module_exit(exit_v3_quota_format);
> diff -rNpu quota.orig/include/linux/dqblk_v3.h quota/include/linux/dqblk_v3.h
> --- quota.orig/include/linux/dqblk_v3.h	1970-01-01 03:00:00.000000000 +0300
> +++ quota/include/linux/dqblk_v3.h	2008-02-27 16:31:45.964283988 +0300
> @@ -0,0 +1,26 @@
> +/*
> + *	Definitions of structures for vfsv1 quota format
> + */
> +
> +#ifndef _LINUX_DQBLK_V3_H
> +#define _LINUX_DQBLK_V3_H
> +
> +#include <linux/types.h>
> +
> +/* id numbers of quota format */
> +#define QFMT_VFS_V1 3
> +
> +/* Numbers of blocks needed for updates */
> +#define V3_INIT_ALLOC 4
> +#define V3_INIT_REWRITE 2
> +#define V3_DEL_ALLOC 0
> +#define V3_DEL_REWRITE 6
> +
> +/* Inmemory copy of version specific information */
> +struct v3_mem_dqinfo {
> +	unsigned int dqi_blocks;
> +	unsigned int dqi_free_blk;
> +	unsigned int dqi_free_entry;
> +};
> +
> +#endif /* _LINUX_DQBLK_V3_H */
> diff -rNpu quota.orig/include/linux/quota.h quota/include/linux/quota.h
> --- quota.orig/include/linux/quota.h	2006-03-20 08:53:29.000000000 +0300
> +++ quota/include/linux/quota.h	2008-02-27 16:30:15.306620367 +0300
> @@ -136,24 +136,27 @@ struct if_dqinfo {
>  #include <linux/dqblk_xfs.h>
>  #include <linux/dqblk_v1.h>
>  #include <linux/dqblk_v2.h>
> +#include <linux/dqblk_v3.h>
>  
>  /* Maximal numbers of writes for quota operation (insert/delete/update)
>   * (over VFS all formats) */
> -#define DQUOT_INIT_ALLOC max(V1_INIT_ALLOC, V2_INIT_ALLOC)
> -#define DQUOT_INIT_REWRITE max(V1_INIT_REWRITE, V2_INIT_REWRITE)
> -#define DQUOT_DEL_ALLOC max(V1_DEL_ALLOC, V2_DEL_ALLOC)
> -#define DQUOT_DEL_REWRITE max(V1_DEL_REWRITE, V2_DEL_REWRITE)
> +#define DQUOT_INIT_ALLOC max(V1_INIT_ALLOC, max(V2_INIT_ALLOC, V3_INIT_ALLOC))
> +#define DQUOT_INIT_REWRITE max(max(V2_INIT_REWRITE, V3_INIT_REWRITE),\
> +				V1_INIT_REWRITE)
> +#define DQUOT_DEL_ALLOC max(V1_DEL_ALLOC, max(V2_DEL_ALLOC, V3_DEL_ALLOC))
> +#define DQUOT_DEL_REWRITE max(max(V2_DEL_REWRITE, V3_DEL_REWRITE),\
> +				V1_DEL_REWRITE)
>  
>  /*
>   * Data for one user/group kept in memory
>   */
>  struct mem_dqblk {
> -	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> -	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
> +	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> +	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
>  	qsize_t dqb_curspace;	/* current used space */
> -	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
> -	__u32 dqb_isoftlimit;	/* preferred inode limit */
> -	__u32 dqb_curinodes;	/* current # allocated inodes */
> +	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
> +	qsize_t dqb_isoftlimit;	/* preferred inode limit */
> +	qsize_t dqb_curinodes;	/* current # allocated inodes */
>  	time_t dqb_btime;	/* time limit for excessive disk use */
>  	time_t dqb_itime;	/* time limit for excessive inode use */
>  };
> @@ -172,6 +173,7 @@ struct mem_dqinfo {
>  	union {
>  		struct v1_mem_dqinfo v1_i;
>  		struct v2_mem_dqinfo v2_i;
> +		struct v3_mem_dqinfo v3_i;
>  	} u;
>  };
>  
> @@ -315,6 +317,7 @@ struct quota_module_name {
>  #define INIT_QUOTA_MODULE_NAMES {\
>  	{QFMT_VFS_OLD, "quota_v1"},\
>  	{QFMT_VFS_V0, "quota_v2"},\
> +	{QFMT_VFS_V1, "quota_v3"},\
>  	{0, NULL}}
>  
>  #else
> diff -rNpu quota.orig/include/linux/quotaio_v3.h quota/include/linux/quotaio_v3.h
> --- quota.orig/include/linux/quotaio_v3.h	1970-01-01 03:00:00.000000000 +0300
> +++ quota/include/linux/quotaio_v3.h	2008-02-29 19:16:26.281092724 +0300
> @@ -0,0 +1,81 @@
> +/*
> + *	Definitions of structures for vfsv1quota format
> + */
> +
> +#ifndef _LINUX_QUOTAIO_V3_H
> +#define _LINUX_QUOTAIO_V3_H
> +
> +#include <linux/types.h>
> +#include <linux/quota.h>
> +
> +/*
> + * Definitions of magics and versions of current quota files
> + */
> +#define V3_INITQMAGICS {\
> +	0xd9c01f11,	/* USRQUOTA */\
> +	0xd9c01927	/* GRPQUOTA */\
> +}
> +
> +#define V3_INITQVERSIONS {\
> +	1,		/* USRQUOTA */\
> +	1		/* GRPQUOTA */\
> +}
> +
> +/*
> + * The following structure defines the format of the disk quota file
> + * (as it appears on disk) - the file is a radix tree whose leaves point
> + * to blocks of these structures.
> + */
> +struct v3_disk_dqblk {
> +	__le32 dqb_id;		/* id this quota applies to */
> +	__le32 dqb_padding;	/* padding field */
> +	__le64 dqb_ihardlimit;	/* absolute limit on allocated inodes */
> +	__le64 dqb_isoftlimit;	/* preferred inode limit */
> +	__le64 dqb_curinodes;	/* current # allocated inodes */
> +	__le64 dqb_bhardlimit;	/* absolute limit on disk space (in blocks) */
> +	__le64 dqb_bsoftlimit;	/* preferred limit on disk space (in blocks) */
> +	__le64 dqb_curspace;	/* current space occupied (in bytes) */
> +	__le64 dqb_btime;	/* time limit for excessive disk use */
> +	__le64 dqb_itime;	/* time limit for excessive inode use */
> +};
> +
> +/*
> + * Here are header structures as written on disk and their in-memory copies
> + */
> +/* First generic header */
> +struct v3_disk_dqheader {
> +	__le32 dqh_magic;	/* Magic number identifying file */
> +	__le32 dqh_version;	/* File version */
> +};
> +
> +/* Header with type and version specific information */
> +struct v3_disk_dqinfo {
> +	__le32 dqi_bgrace;	/* Time before block soft limit becomes hard */
> +	__le32 dqi_igrace;	/* Time before inode soft limit becomes hard */
> +	__le32 dqi_flags;	/* Flags for quotafile (DQF_*) */
> +	__le32 dqi_blocks;	/* Number of blocks in file */
> +	__le32 dqi_free_blk;	/* Number of first free block in the list */
> +	__le32 dqi_free_entry;	/* Number of block with a free entry */
> +};
> +
> +/*
> + *  Structure of header of block with quota structures. It is padded to
> + *  16 bytes so there will be space for exactly 21 quota-entries in a block
> + */
> +struct v3_disk_dqdbheader {
> +	__le32 dqdh_next_free;	/* Number of next block with free entry */
> +	__le32 dqdh_prev_free;	/* Number of previous block with free entry */
> +	__le16 dqdh_entries;	/* Number of valid entries in block */
> +	__le16 dqdh_pad1;
> +	__le32 dqdh_pad2;
> +};
> +
> +#define V3_DQINFOOFF	sizeof(struct v3_disk_dqheader)
> +#define V3_DQBLKSIZE_BITS	10
> +#define V3_DQBLKSIZE	(1 << V3_DQBLKSIZE_BITS)
> +#define V3_DQTREEOFF	1		/* Offset of tree in file in blocks */
> +#define V3_DQTREEDEPTH	4		/* Depth of quota tree */
> +#define V3_DQSTRINBLK	((V3_DQBLKSIZE - sizeof(struct v3_disk_dqdbheader)) / \
> +			sizeof(struct v3_disk_dqblk))
> +
> +#endif /* _LINUX_QUOTAIO_V3_H */
> diff -rNpu quota.orig/fs/ext3/super.c quota/fs/ext3/super.c
> --- quota.orig/fs/ext3/super.c	2008-01-24 14:33:52.000000000 +0300
> +++ quota/fs/ext3/super.c	2008-03-01 17:50:26.000000000 +0300
> @@ -522,7 +522,8 @@ static inline void ext3_show_quota_optio
>  
>  	if (sbi->s_jquota_fmt)
>  		seq_printf(seq, ",jqfmt=%s",
> -		(sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
> +		(sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold":
> +		((sbi->s_jquota_fmt == QFMT_VFS_V0) ? "vfsv0" : "vfsv1"));
>  
>  	if (sbi->s_qf_names[USRQUOTA])
>  		seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
> @@ -673,7 +674,7 @@ enum {
>  	Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
>  	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
>  	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
> -	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
> +	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
> -	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
> +	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, Opt_noquota,
>  	Opt_grpquota
>  };
> @@ -719,6 +720,7 @@ static match_table_t tokens = {
>  	{Opt_grpjquota, "grpjquota=%s"},
>  	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
>  	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
> +	{Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
>  	{Opt_grpquota, "grpquota"},
>  	{Opt_noquota, "noquota"},
>  	{Opt_quota, "quota"},
> @@ -990,6 +992,9 @@ clear_qf_name:
>  		case Opt_jqfmt_vfsv0:
>  			sbi->s_jquota_fmt = QFMT_VFS_V0;
>  			break;
> +		case Opt_jqfmt_vfsv1:
> +			sbi->s_jquota_fmt = QFMT_VFS_V1;
> +			break;
>  		case Opt_quota:
>  		case Opt_usrquota:
>  			set_opt(sbi->s_mount_opt, QUOTA);
> @@ -1019,6 +1024,7 @@ clear_qf_name:
>  		case Opt_offgrpjquota:
>  		case Opt_jqfmt_vfsold:
>  		case Opt_jqfmt_vfsv0:
> +		case Opt_jqfmt_vfsv1:
>  			printk(KERN_ERR
>  				"EXT3-fs: journalled quota options not "
>  				"supported.\n");
> diff -rNpu quota.orig/fs/reiserfs/super.c quota/fs/reiserfs/super.c
> --- quota.orig/fs/reiserfs/super.c	2008-01-24 14:33:52.000000000 +0300
> +++ quota/fs/reiserfs/super.c	2008-03-01 17:51:12.000000000 +0300
> @@ -1021,6 +1021,8 @@ static int reiserfs_parse_options(struct
>  				REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD;
>  			else if (!strcmp(arg, "vfsv0"))
>  				REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0;
> +			else if (!strcmp(arg, "vfsv1"))
> +				REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V1;
>  			else {
>  				reiserfs_warning(s,
>  						 "reiserfs_parse_options: unknown quota format specified.");
> 
> Signed-off-by: Andrew Perepechko <andrew.perepechko@sun.com>
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs
  2008-03-06 14:48 ` Jan Kara
@ 2008-03-09 22:46   ` Andrew Perepechko
  2008-03-10 16:26     ` Jan Kara
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Perepechko @ 2008-03-09 22:46 UTC (permalink / raw)
  To: Jan Kara; +Cc: linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

Hello, Jan!

Did you mean something like the following? It seems to be quite a large patch 
as I expected, but it does keep quota_v2.c structure.

Thanks.
Andrew

 fs/quota_v2.c              |  233 ++++++++++++++++++++++++++++++---------------
 include/linux/dqblk_v2.h   |    1
 include/linux/quota.h      |   14 +-
 include/linux/quotaio_v2.h |   34 +++++-
 4 files changed, 198 insertions(+), 84 deletions(-)

diff -rNpu quota.orig/fs/quota_v2.c quota/fs/quota_v2.c
--- quota.orig/fs/quota_v2.c	2008-01-25 01:58:37.000000000 +0300
+++ quota/fs/quota_v2.c	2008-03-10 01:20:28.000000000 +0300
@@ -23,26 +23,66 @@ MODULE_LICENSE("GPL");
 typedef char *dqbuf_t;
 
 #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
+#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
+			 sizeof(struct v2_disk_dqdbheader)))
 
-/* Check whether given file is really vfsv0 quotafile */
-static int v2_check_quota_file(struct super_block *sb, int type)
+static inline uint v2_dqblksz(uint rev)
+{
+	uint sz = 0; /* make the compiler happy */
+
+	switch (rev) {
+	case 0:
+		sz = sizeof(struct v2_disk_dqblk_r0);
+		break;
+	case 1:
+		sz = sizeof(struct v2_disk_dqblk_r1);
+		break;
+	default:
+		BUG();
+	}
+
+	return sz;
+}
+
+/* Number of quota entries in a block */
+static inline int v2_dqstrinblk(uint rev)
+{
+	return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev);
+}
+
+/* Get revision of a quota file, -1 if it does not look a quota file */
+static int v2_quota_file_revision(struct super_block *sb, int type)
 {
 	struct v2_disk_dqheader dqhead;
 	ssize_t size;
 	static const uint quota_magics[] = V2_INITQMAGICS;
-	static const uint quota_versions[] = V2_INITQVERSIONS;
+	static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0;
+	static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1;
  
 	size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
 	if (size != sizeof(struct v2_disk_dqheader)) {
 		printk("quota_v2: failed read expected=%zd got=%zd\n",
 			sizeof(struct v2_disk_dqheader), size);
-		return 0;
+		return -1;
 	}
-	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
-	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
-		return 0;
-	return 1;
+	if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
+		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
+			return 0;
+		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
+			return 1;
+	}
+	return -1;
+}
+
+/* Check whether given file is really vfsv0 quotafile */
+static int v2_check_quota_file(struct super_block *sb, int type)
+{
+	return v2_quota_file_revision(sb, type) != -1;
+}
+
+static qsize_t maxlimit(int rev)
+{
+	return (rev == 0) ? 0xffffffffULL : 0xffffffffffffffffULL;
 }
 
 /* Read information header from quota file */
@@ -51,6 +91,10 @@ static int v2_read_file_info(struct supe
 	struct v2_disk_dqinfo dinfo;
 	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
 	ssize_t size;
+	int rev;
+
+	rev = v2_quota_file_revision(sb, type);
+	BUG_ON(rev < 0);
 
 	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
@@ -65,6 +109,13 @@ static int v2_read_file_info(struct supe
 	info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
 	info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
 	info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+
+	info->u.v2_i.dqi_revision = rev;
+	info->dqi_maxbhardlimit = maxlimit(rev);
+	info->dqi_maxbsoftlimit = maxlimit(rev);
+	info->dqi_maxihardlimit = maxlimit(rev);
+	info->dqi_maxisoftlimit = maxlimit(rev);
+
 	return 0;
 }
 
@@ -94,29 +145,47 @@ static int v2_write_file_info(struct sup
 	return 0;
 }
 
-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
-{
-	m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
-	m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
-	m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
-	m->dqb_itime = le64_to_cpu(d->dqb_itime);
-	m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
-	m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
-	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
-	m->dqb_btime = le64_to_cpu(d->dqb_btime);
-}
-
-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
-{
-	d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
-	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
-	d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
-	d->dqb_itime = cpu_to_le64(m->dqb_itime);
-	d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
-	d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
-	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
-	d->dqb_btime = cpu_to_le64(m->dqb_btime);
-	d->dqb_id = cpu_to_le32(id);
+#define DQ2MQ(v) (sizeof(v) == sizeof(__u64) ? \
+		  (qsize_t)le64_to_cpu(v) : \
+		  (qsize_t)le32_to_cpu(v))
+
+#define MQ2DQ(v, newv) (sizeof(v) == sizeof(__u64) ? \
+			(v = cpu_to_le64((__u64)newv)) : \
+			(v = cpu_to_le32((__u32)newv)))
+
+#define DQF_GET(var, rev, field) (rev == 0 ? \
+		DQ2MQ((var)->disk_dqblk_r0.field) : \
+		DQ2MQ((var)->disk_dqblk_r1.field))
+
+#define DQF_PUT(var, rev, field, val) (rev == 0 ? \
+		MQ2DQ((var)->disk_dqblk_r0.field, val) : \
+		MQ2DQ((var)->disk_dqblk_r1.field, val))
+
+void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d,
+		 uint rev)
+{
+	m->dqb_ihardlimit = DQF_GET(d, rev, dqb_ihardlimit);
+	m->dqb_isoftlimit = DQF_GET(d, rev, dqb_isoftlimit);
+	m->dqb_curinodes = DQF_GET(d, rev, dqb_curinodes);
+	m->dqb_itime = DQF_GET(d, rev, dqb_itime);
+	m->dqb_bhardlimit = DQF_GET(d, rev, dqb_bhardlimit);
+	m->dqb_bsoftlimit = DQF_GET(d, rev, dqb_bsoftlimit);
+	m->dqb_curspace = DQF_GET(d, rev, dqb_curspace);
+	m->dqb_btime = DQF_GET(d, rev, dqb_btime);
+}
+
+static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m,
+			qid_t id, uint rev)
+{
+	DQF_PUT(d, rev, dqb_ihardlimit, m->dqb_ihardlimit);
+	DQF_PUT(d, rev, dqb_isoftlimit, m->dqb_isoftlimit);
+	DQF_PUT(d, rev, dqb_curinodes, m->dqb_curinodes);
+	DQF_PUT(d, rev, dqb_itime, m->dqb_itime);
+	DQF_PUT(d, rev, dqb_bhardlimit, m->dqb_bhardlimit);
+	DQF_PUT(d, rev, dqb_bsoftlimit, m->dqb_bsoftlimit);
+	DQF_PUT(d, rev, dqb_curspace, m->dqb_curspace);
+	DQF_PUT(d, rev, dqb_btime, m->dqb_btime);
+	DQF_PUT(d, rev, dqb_id, id);
 }
 
 static dqbuf_t getdqbuf(void)
@@ -268,10 +337,11 @@ static uint find_free_dqentry(struct dqu
 {
 	struct super_block *sb = dquot->dq_sb;
 	struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
-	uint blk, i;
+	uint blk, i, rev = info->u.v2_i.dqi_revision, 
+	     dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 	struct v2_disk_dqdbheader *dh;
-	struct v2_disk_dqblk *ddquot;
-	struct v2_disk_dqblk fakedquot;
+	union v2_disk_dqblk *ddquot;
+	union v2_disk_dqblk fakedquot;
 	dqbuf_t buf;
 
 	*err = 0;
@@ -298,17 +368,19 @@ static uint find_free_dqentry(struct dqu
 		info->u.v2_i.dqi_free_entry = blk;
 		mark_info_dirty(sb, dquot->dq_type);
 	}
-	if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)	/* Block will be full? */
+	/* Block will be full? */
+	if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
 		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
 			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
 			goto out_buf;
 		}
 	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
-	memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
+	memset(&fakedquot, 0, dqblksz);
 	/* Find free structure in block */
-	for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
+	for (i = 0; i < dqstrinblk && memcmp(&fakedquot,
+			ddquot+i, dqblksz); i++);
 #ifdef __QUOTA_V2_PARANOIA
-	if (i == V2_DQSTRINBLK) {
+	if (i == dqstrinblk) {
 		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
 		*err = -EIO;
 		goto out_buf;
@@ -318,7 +390,8 @@ static uint find_free_dqentry(struct dqu
 		printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
 		goto out_buf;
 	}
-	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
+	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
+			sizeof(struct v2_disk_dqdbheader)+i*dqblksz;
 	freedqbuf(buf);
 	return blk;
 out_buf:
@@ -392,7 +465,10 @@ static int v2_write_dquot(struct dquot *
 {
 	int type = dquot->dq_type;
 	ssize_t ret;
-	struct v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot, empty;
+	struct super_block *sb = dquot->dq_sb;
+	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev);
 
 	/* dq_off is guarded by dqio_mutex */
 	if (!dquot->dq_off)
@@ -401,18 +477,19 @@ static int v2_write_dquot(struct dquot *
 			return ret;
 		}
 	spin_lock(&dq_data_lock);
-	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
+	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev);
 	/* Argh... We may need to write structure full of zeroes but that would be
 	 * treated as an empty place by the rest of the code. Format change would
 	 * be definitely cleaner but the problems probably are not worth it */
-	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-	if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-		ddquot.dqb_itime = cpu_to_le64(1);
+	memset(&empty, 0, dqblksz);
+	if (!memcmp(&empty, &ddquot, dqblksz))
+		DQF_PUT(&ddquot, rev, dqb_itime, 1);
 	spin_unlock(&dq_data_lock);
-	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
-	      (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
-	if (ret != sizeof(struct v2_disk_dqblk)) {
-		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
+	ret = sb->s_op->quota_write(sb, type,
+	      (char *)&ddquot, dqblksz, dquot->dq_off);
+	if (ret != dqblksz) {
+		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", 
+			sb->s_id);
 		if (ret >= 0)
 			ret = -ENOSPC;
 	}
@@ -431,6 +508,7 @@ static int free_dqentry(struct dquot *dq
 	struct v2_disk_dqdbheader *dh;
 	dqbuf_t buf = getdqbuf();
 	int ret = 0;
+	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
 
 	if (!buf)
 		return -ENOMEM;
@@ -456,8 +534,8 @@ static int free_dqentry(struct dquot *dq
 	}
 	else {
 		memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
-		  sizeof(struct v2_disk_dqblk));
-		if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
+		  v2_dqblksz(rev));
+		if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) {
 			/* Insert will write block itself */
 			if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
 				printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
@@ -535,27 +613,33 @@ static loff_t find_block_dqentry(struct
 	dqbuf_t buf = getdqbuf();
 	loff_t ret = 0;
 	int i;
-	struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
+	union v2_disk_dqblk *ddquot = GETENTRIES(buf);
+	struct super_block *sb = dquot->dq_sb;
+	int type = dquot->dq_type;
+	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 
 	if (!buf)
 		return -ENOMEM;
-	if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
+
+	ret = read_blk(sb, type, blk, buf);
+	if (ret < 0) {
 		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
 		goto out_buf;
 	}
 	if (dquot->dq_id)
-		for (i = 0; i < V2_DQSTRINBLK &&
-		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
+		for (i = 0; i < dqstrinblk &&
+		     DQF_GET(ddquot+i, rev, dqb_id) != dquot->dq_id; i++);
 	else {	/* ID 0 as a bit more complicated searching... */
-		struct v2_disk_dqblk fakedquot;
+		union v2_disk_dqblk fakedquot;
 
-		memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
-		for (i = 0; i < V2_DQSTRINBLK; i++)
-			if (!le32_to_cpu(ddquot[i].dqb_id) &&
-			    memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
+		memset(&fakedquot, 0, dqblksz);
+		for (i = 0; i < dqstrinblk; i++)
+			if (!DQF_GET(ddquot+i, rev, dqb_id) &&
+			    memcmp(&fakedquot, ddquot+i, dqblksz))
 				break;
 	}
-	if (i == V2_DQSTRINBLK) {
+	if (i == dqstrinblk) {
 		printk(KERN_ERR "VFS: Quota for id %u referenced "
 		  "but not present.\n", dquot->dq_id);
 		ret = -EIO;
@@ -563,7 +647,7 @@ static loff_t find_block_dqentry(struct
 	}
 	else
 		ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
-		  v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
+		  v2_disk_dqdbheader) + i * dqblksz;
 out_buf:
 	freedqbuf(buf);
 	return ret;
@@ -605,12 +689,13 @@ static int v2_read_dquot(struct dquot *d
 {
 	int type = dquot->dq_type;
 	loff_t offset;
-	struct v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot, empty;
 	int ret = 0;
+	struct super_block *sb = dquot->dq_sb;
 
 #ifdef __QUOTA_V2_PARANOIA
 	/* Invalidated quota? */
-	if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
+	if (!sb || !sb_dqopt(sb)->files[type]) {
 		printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
 		return -EIO;
 	}
@@ -626,25 +711,27 @@ static int v2_read_dquot(struct dquot *d
 		ret = offset;
 	}
 	else {
+		uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision,
+		     dqblksz = v2_dqblksz(rev);
 		dquot->dq_off = offset;
-		if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
-		    (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
-		    != sizeof(struct v2_disk_dqblk)) {
+		ret = sb->s_op->quota_read(sb, type, (char *)&ddquot, 
+					   dqblksz, offset);
+		if (ret != dqblksz) {
 			if (ret >= 0)
 				ret = -EIO;
 			printk(KERN_ERR "VFS: Error while reading quota "
 			  "structure for id %u.\n", dquot->dq_id);
-			memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
+			memset(&ddquot, 0, dqblksz);
 		}
 		else {
 			ret = 0;
 			/* We need to escape back all-zero structure */
-			memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-			empty.dqb_itime = cpu_to_le64(1);
-			if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-				ddquot.dqb_itime = 0;
+			memset(&empty, 0, dqblksz);
+			DQF_PUT(&empty, rev, dqb_itime, 1);
+			if (!memcmp(&empty, &ddquot, dqblksz))
+				DQF_PUT(&ddquot, rev, dqb_itime, 0);
 		}
-		disk2memdqb(&dquot->dq_dqb, &ddquot);
+		disk2memdqb(&dquot->dq_dqb, &ddquot, rev);
 		if (!dquot->dq_dqb.dqb_bhardlimit &&
 			!dquot->dq_dqb.dqb_bsoftlimit &&
 			!dquot->dq_dqb.dqb_ihardlimit &&
diff -rNpu quota.orig/include/linux/dqblk_v2.h quota/include/linux/dqblk_v2.h
--- quota.orig/include/linux/dqblk_v2.h	2008-01-25 01:58:37.000000000 +0300
+++ quota/include/linux/dqblk_v2.h	2008-03-08 22:27:02.000000000 +0300
@@ -21,6 +21,7 @@ struct v2_mem_dqinfo {
 	unsigned int dqi_blocks;
 	unsigned int dqi_free_blk;
 	unsigned int dqi_free_entry;
+	unsigned int dqi_revision;
 };
 
 #endif /* _LINUX_DQBLK_V2_H */
diff -rNpu quota.orig/include/linux/quota.h quota/include/linux/quota.h
--- quota.orig/include/linux/quota.h	2008-01-25 01:58:37.000000000 +0300
+++ quota/include/linux/quota.h	2008-03-09 22:38:31.000000000 +0300
@@ -181,12 +181,12 @@ extern spinlock_t dq_data_lock;
  * Data for one user/group kept in memory
  */
 struct mem_dqblk {
-	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
-	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
+	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
 	qsize_t dqb_curspace;	/* current used space */
-	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__u32 dqb_isoftlimit;	/* preferred inode limit */
-	__u32 dqb_curinodes;	/* current # allocated inodes */
+	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	qsize_t dqb_isoftlimit;	/* preferred inode limit */
+	qsize_t dqb_curinodes;	/* current # allocated inodes */
 	time_t dqb_btime;	/* time limit for excessive disk use */
 	time_t dqb_itime;	/* time limit for excessive inode use */
 };
@@ -202,6 +202,10 @@ struct mem_dqinfo {
 	unsigned long dqi_flags;
 	unsigned int dqi_bgrace;
 	unsigned int dqi_igrace;
+	qsize_t dqi_maxbhardlimit;
+	qsize_t dqi_maxbsoftlimit;
+	qsize_t dqi_maxihardlimit;
+	qsize_t dqi_maxisoftlimit;
 	union {
 		struct v1_mem_dqinfo v1_i;
 		struct v2_mem_dqinfo v2_i;
diff -rNpu quota.orig/include/linux/quotaio_v2.h quota/include/linux/quotaio_v2.h
--- quota.orig/include/linux/quotaio_v2.h	2008-01-25 01:58:37.000000000 +0300
+++ quota/include/linux/quotaio_v2.h	2008-03-09 20:34:42.000000000 +0300
@@ -16,28 +16,51 @@
 	0xd9c01927	/* GRPQUOTA */\
 }
 
-#define V2_INITQVERSIONS {\
+#define V2_INITQVERSIONS_R0 {\
 	0,		/* USRQUOTA */\
 	0		/* GRPQUOTA */\
 }
 
+#define V2_INITQVERSIONS_R1 {\
+	1,		/* USRQUOTA */\
+	1		/* GRPQUOTA */\
+}
+
 /*
  * The following structure defines the format of the disk quota file
  * (as it appears on disk) - the file is a radix tree whose leaves point
  * to blocks of these structures.
  */
-struct v2_disk_dqblk {
+struct v2_disk_dqblk_r0 {
 	__le32 dqb_id;		/* id this quota applies to */
 	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
 	__le32 dqb_isoftlimit;	/* preferred inode limit */
 	__le32 dqb_curinodes;	/* current # allocated inodes */
-	__le32 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
-	__le32 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
+	__le32 dqb_bhardlimit;	/* absolute limit on disk space */
+	__le32 dqb_bsoftlimit;	/* preferred limit on disk space */
+	__le64 dqb_curspace;	/* current space occupied (in bytes) */
+	__le64 dqb_btime;	/* time limit for excessive disk use */
+	__le64 dqb_itime;	/* time limit for excessive inode use */
+};
+
+struct v2_disk_dqblk_r1 {
+	__le32 dqb_id;		/* id this quota applies to */
+	__le32 dqb_padding;	/* padding field */
+	__le64 dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	__le64 dqb_isoftlimit;	/* preferred inode limit */
+	__le64 dqb_curinodes;	/* current # allocated inodes */
+	__le64 dqb_bhardlimit;	/* absolute limit on disk space */
+	__le64 dqb_bsoftlimit;	/* preferred limit on disk space */
 	__le64 dqb_curspace;	/* current space occupied (in bytes) */
 	__le64 dqb_btime;	/* time limit for excessive disk use */
 	__le64 dqb_itime;	/* time limit for excessive inode use */
 };
 
+union v2_disk_dqblk {
+	struct v2_disk_dqblk_r0 disk_dqblk_r0;
+	struct v2_disk_dqblk_r1 disk_dqblk_r1;
+};
+
 /*
  * Here are header structures as written on disk and their in-memory copies
  */
@@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
 
 /*
  *  Structure of header of block with quota structures. It is padded to 16 bytes so
- *  there will be space for exactly 21 quota-entries in a block
+ *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
  */
 struct v2_disk_dqdbheader {
 	__le32 dqdh_next_free;	/* Number of next block with free entry */
@@ -74,6 +97,5 @@ struct v2_disk_dqdbheader {
 #define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
 #define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
 #define V2_DQTREEDEPTH	4		/* Depth of quota tree */
-#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
 
 #endif /* _LINUX_QUOTAIO_V2_H */


On Thursday 06 March 2008 17:48:24 Jan Kara wrote:
>   Hello,
> 
>   Sorry for not responding for a few days. I was busy with other things.
> 
> On Thu 06-03-08 16:41:11, Andrew Perepechko wrote:
> > We are in need of large (above 4 TB) block quota limits, but it seems like XFS filesystem 
> > (having its own quota implementation) is the only available fs that supports them. Currently
> > ext3 supports up to 8 TB of data and forthcoming ext4 will support even more.
> > 
> > Linux kernel has two implementations of quota format modules:
> > quota_v1 (with QFMT_VFS_OLD id)
> > quota_v2 (with QFMT_VFS_V0 id)
> > Either uses 32-bit data types to store quota limits on disk
> > (see struct v1_disk_dqblk and struct v2_disk_dqblk). Block quota limits 
> > are stored in 1kb units (QUOTABLOCK_SIZE constant) which gives
> > the largest possible quota limit of (2^32-1)*2^10 bytes ~ 4 TB.
> > 
> > In-memory quota entries representation suffers from the same 4 TB 
> > limitation (see struct mem_dqblk).
> > 
> > The patch below adds a separate quota_v3 module which deals with 64-bit data  to solve the problem
> > (another possible approach is to merge the code into quota_v2 module to reuse some amount of the code - 
> > this won't reuse a lot because there're too many references to disk_dqblk structures and dependent constants).
> > 
> > Could you comment on the patch and the idea behind it in general?
>   Just from a quick look. There seem to be actually two separate changes:
> 1) Change current formats so that they refuse to set quota above treshold they
> are able to handle. That's fine a we should do that (in a separate patch,
> please).
> 
> 2) Implement new format able to handle more that 4TB limits. In principle,
> that is fine but vfsv0 format has actually been designed so that similar
> changes can go mostly invisible for userspace (modulo some tools updates
> etc.). Given that the format itself doesn't change that much, we definitely
> do not need to introduce completely new quota format. I'd just increase the
> version number. Also I'd like to avoid unnecessary code duplication. The
> only thing that is really different are just the conversion routines from
> disk to memory. So I'd just modify the code in fs/quota_v2.c so that it
> supports both versions of the quota format - you need to parametrize macros
> like GETENTRIES(), V2_DQSTRINBLK, ... (actually make inline functions of
> them when we are changing it), probably make union of struct v2_disk_dqblk
> including structures for both versions and change sizeof(struct
> v2_disk_dqblk) to some function. But all this shouldn't be that hard to do
> in a nice way...
> 
> 									Honza


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs
  2008-03-09 22:46   ` Andrew Perepechko
@ 2008-03-10 16:26     ` Jan Kara
  2008-03-10 17:13       ` Andrew Perepechko
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Kara @ 2008-03-10 16:26 UTC (permalink / raw)
  To: Andrew Perepechko
  Cc: linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

  Hello Andrew,

On Mon 10-03-08 01:46:02, Andrew Perepechko wrote:
> Did you mean something like the following? It seems to be quite a large patch 
> as I expected, but it does keep quota_v2.c structure.
  Yes, that's exactly what I meant. Thanks for writing this. Below in the
patch are some minor comments. After fixing those I think I can take the
patch.

									Honza

> diff -rNpu quota.orig/fs/quota_v2.c quota/fs/quota_v2.c
> --- quota.orig/fs/quota_v2.c	2008-01-25 01:58:37.000000000 +0300
> +++ quota/fs/quota_v2.c	2008-03-10 01:20:28.000000000 +0300
> @@ -23,26 +23,66 @@ MODULE_LICENSE("GPL");
>  typedef char *dqbuf_t;
>  
>  #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
> -#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
> +#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
> +			 sizeof(struct v2_disk_dqdbheader)))
>  
> -/* Check whether given file is really vfsv0 quotafile */
> -static int v2_check_quota_file(struct super_block *sb, int type)
> +static inline uint v2_dqblksz(uint rev)
> +{
> +	uint sz = 0; /* make the compiler happy */
> +
> +	switch (rev) {
> +	case 0:
> +		sz = sizeof(struct v2_disk_dqblk_r0);
> +		break;
> +	case 1:
> +		sz = sizeof(struct v2_disk_dqblk_r1);
> +		break;
> +	default:
> +		BUG();
> +	}
> +
> +	return sz;
> +}
> +
> +/* Number of quota entries in a block */
> +static inline int v2_dqstrinblk(uint rev)
> +{
> +	return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev);
> +}
> +
> +/* Get revision of a quota file, -1 if it does not look a quota file */
> +static int v2_quota_file_revision(struct super_block *sb, int type)
>  {
>  	struct v2_disk_dqheader dqhead;
>  	ssize_t size;
>  	static const uint quota_magics[] = V2_INITQMAGICS;
> -	static const uint quota_versions[] = V2_INITQVERSIONS;
> +	static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0;
> +	static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1;
>   
>  	size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
>  	if (size != sizeof(struct v2_disk_dqheader)) {
>  		printk("quota_v2: failed read expected=%zd got=%zd\n",
>  			sizeof(struct v2_disk_dqheader), size);
> -		return 0;
> +		return -1;
>  	}
> -	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
> -	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
> -		return 0;
> -	return 1;
> +	if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
> +		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
> +			return 0;
> +		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
> +			return 1;
> +	}
> +	return -1;
> +}
> +
> +/* Check whether given file is really vfsv0 quotafile */
> +static int v2_check_quota_file(struct super_block *sb, int type)
> +{
> +	return v2_quota_file_revision(sb, type) != -1;
> +}
> +
> +static qsize_t maxlimit(int rev)
> +{
> +	return (rev == 0) ? 0xffffffffULL : 0xffffffffffffffffULL;
  Hmm, since we use this function just in v2_read_file_info and it doesn't
make much sence to me to combine inode and block limits, I'd just remove
this function and do something like:

  if (rev == 0) {
	info->dqi_maxbhardlimit = 0xffffffffULL;
        ...
  }
  else {
	info->dqi_maxbhardlimit =  0xffffffffffffffffULL;
        ...
  }

  BTW: It probably doesn't make sence to have different limits for
hardlimit and softlimit, so I'd just use two limits:
  maxblimit and maxilimit.

>  }
>  
>  /* Read information header from quota file */
> @@ -51,6 +91,10 @@ static int v2_read_file_info(struct supe
>  	struct v2_disk_dqinfo dinfo;
>  	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
>  	ssize_t size;
> +	int rev;
> +
> +	rev = v2_quota_file_revision(sb, type);
> +	BUG_ON(rev < 0);
>  
>  	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
>  	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
> @@ -65,6 +109,13 @@ static int v2_read_file_info(struct supe
>  	info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
>  	info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
>  	info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
> +
> +	info->u.v2_i.dqi_revision = rev;
> +	info->dqi_maxbhardlimit = maxlimit(rev);
> +	info->dqi_maxbsoftlimit = maxlimit(rev);
> +	info->dqi_maxihardlimit = maxlimit(rev);
> +	info->dqi_maxisoftlimit = maxlimit(rev);
> +
  Please see above.

>  	return 0;
>  }
>  
> @@ -94,29 +145,47 @@ static int v2_write_file_info(struct sup
>  	return 0;
>  }
>  
> -static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
> -{
> -	m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
> -	m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
> -	m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
> -	m->dqb_itime = le64_to_cpu(d->dqb_itime);
> -	m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
> -	m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
> -	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
> -	m->dqb_btime = le64_to_cpu(d->dqb_btime);
> -}
> -
> -static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
> -{
> -	d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
> -	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
> -	d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
> -	d->dqb_itime = cpu_to_le64(m->dqb_itime);
> -	d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
> -	d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
> -	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
> -	d->dqb_btime = cpu_to_le64(m->dqb_btime);
> -	d->dqb_id = cpu_to_le32(id);
> +#define DQ2MQ(v) (sizeof(v) == sizeof(__u64) ? \
> +		  (qsize_t)le64_to_cpu(v) : \
> +		  (qsize_t)le32_to_cpu(v))
> +
> +#define MQ2DQ(v, newv) (sizeof(v) == sizeof(__u64) ? \
> +			(v = cpu_to_le64((__u64)newv)) : \
> +			(v = cpu_to_le32((__u32)newv)))
> +
> +#define DQF_GET(var, rev, field) (rev == 0 ? \
> +		DQ2MQ((var)->disk_dqblk_r0.field) : \
> +		DQ2MQ((var)->disk_dqblk_r1.field))
> +
> +#define DQF_PUT(var, rev, field, val) (rev == 0 ? \
> +		MQ2DQ((var)->disk_dqblk_r0.field, val) : \
> +		MQ2DQ((var)->disk_dqblk_r1.field, val))
  Actually, these macros will do the right thing for dqb_id only be sheer
luck and they won't work for dqb_curspace, dqb_itime and dqb_btime.
  Please just get rid of them, they aren't very nice anyway. In disk2memdqb()
and mem2diskdqb() just do

  if (rev == 0) {
	conversions..
  }
  else if (ret == 1) {
	conversions..
  }
  else {
	BUG();
  }

  And for fields dqb_id and dqb_itime we use at other places introduce
functions get_dqb_id() and get/set_dqb_itime().

> +
> +void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d,
> +		 uint rev)
> +{
> +	m->dqb_ihardlimit = DQF_GET(d, rev, dqb_ihardlimit);
> +	m->dqb_isoftlimit = DQF_GET(d, rev, dqb_isoftlimit);
> +	m->dqb_curinodes = DQF_GET(d, rev, dqb_curinodes);
> +	m->dqb_itime = DQF_GET(d, rev, dqb_itime);
> +	m->dqb_bhardlimit = DQF_GET(d, rev, dqb_bhardlimit);
> +	m->dqb_bsoftlimit = DQF_GET(d, rev, dqb_bsoftlimit);
> +	m->dqb_curspace = DQF_GET(d, rev, dqb_curspace);
> +	m->dqb_btime = DQF_GET(d, rev, dqb_btime);
> +}
> +
> +static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m,
> +			qid_t id, uint rev)
> +{
> +	DQF_PUT(d, rev, dqb_ihardlimit, m->dqb_ihardlimit);
> +	DQF_PUT(d, rev, dqb_isoftlimit, m->dqb_isoftlimit);
> +	DQF_PUT(d, rev, dqb_curinodes, m->dqb_curinodes);
> +	DQF_PUT(d, rev, dqb_itime, m->dqb_itime);
> +	DQF_PUT(d, rev, dqb_bhardlimit, m->dqb_bhardlimit);
> +	DQF_PUT(d, rev, dqb_bsoftlimit, m->dqb_bsoftlimit);
> +	DQF_PUT(d, rev, dqb_curspace, m->dqb_curspace);
> +	DQF_PUT(d, rev, dqb_btime, m->dqb_btime);
> +	DQF_PUT(d, rev, dqb_id, id);
>  }
>  
>  static dqbuf_t getdqbuf(void)
> @@ -268,10 +337,11 @@ static uint find_free_dqentry(struct dqu
>  {
>  	struct super_block *sb = dquot->dq_sb;
>  	struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
> -	uint blk, i;
> +	uint blk, i, rev = info->u.v2_i.dqi_revision, 
> +	     dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
     ^^^
Please just do another uint declaration so that we don't have to wrap
lines.

>  	struct v2_disk_dqdbheader *dh;
> -	struct v2_disk_dqblk *ddquot;
> -	struct v2_disk_dqblk fakedquot;
> +	union v2_disk_dqblk *ddquot;
> +	union v2_disk_dqblk fakedquot;
>  	dqbuf_t buf;
>  
>  	*err = 0;
> @@ -298,17 +368,19 @@ static uint find_free_dqentry(struct dqu
>  		info->u.v2_i.dqi_free_entry = blk;
>  		mark_info_dirty(sb, dquot->dq_type);
>  	}
> -	if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)	/* Block will be full? */
> +	/* Block will be full? */
> +	if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
>  		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
>  			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
>  			goto out_buf;
>  		}
>  	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
> -	memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
> +	memset(&fakedquot, 0, dqblksz);
>  	/* Find free structure in block */
> -	for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
> +	for (i = 0; i < dqstrinblk && memcmp(&fakedquot,
> +			ddquot+i, dqblksz); i++);
>  #ifdef __QUOTA_V2_PARANOIA
> -	if (i == V2_DQSTRINBLK) {
> +	if (i == dqstrinblk) {
>  		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
>  		*err = -EIO;
>  		goto out_buf;
> @@ -318,7 +390,8 @@ static uint find_free_dqentry(struct dqu
>  		printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
>  		goto out_buf;
>  	}
> -	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
> +	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
> +			sizeof(struct v2_disk_dqdbheader)+i*dqblksz;
>  	freedqbuf(buf);
>  	return blk;
>  out_buf:
> @@ -392,7 +465,10 @@ static int v2_write_dquot(struct dquot *
>  {
>  	int type = dquot->dq_type;
>  	ssize_t ret;
> -	struct v2_disk_dqblk ddquot, empty;
> +	union v2_disk_dqblk ddquot, empty;
> +	struct super_block *sb = dquot->dq_sb;
> +	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
> +	uint dqblksz = v2_dqblksz(rev);
>  
>  	/* dq_off is guarded by dqio_mutex */
>  	if (!dquot->dq_off)
> @@ -401,18 +477,19 @@ static int v2_write_dquot(struct dquot *
>  			return ret;
>  		}
>  	spin_lock(&dq_data_lock);
> -	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
> +	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev);
>  	/* Argh... We may need to write structure full of zeroes but that would be
>  	 * treated as an empty place by the rest of the code. Format change would
>  	 * be definitely cleaner but the problems probably are not worth it */
> -	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
> -	if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
> -		ddquot.dqb_itime = cpu_to_le64(1);
> +	memset(&empty, 0, dqblksz);
> +	if (!memcmp(&empty, &ddquot, dqblksz))
> +		DQF_PUT(&ddquot, rev, dqb_itime, 1);
>  	spin_unlock(&dq_data_lock);
> -	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
> -	      (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
> -	if (ret != sizeof(struct v2_disk_dqblk)) {
> -		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
> +	ret = sb->s_op->quota_write(sb, type,
> +	      (char *)&ddquot, dqblksz, dquot->dq_off);
> +	if (ret != dqblksz) {
> +		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", 
> +			sb->s_id);
>  		if (ret >= 0)
>  			ret = -ENOSPC;
>  	}
> @@ -431,6 +508,7 @@ static int free_dqentry(struct dquot *dq
>  	struct v2_disk_dqdbheader *dh;
>  	dqbuf_t buf = getdqbuf();
>  	int ret = 0;
> +	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
>  
>  	if (!buf)
>  		return -ENOMEM;
> @@ -456,8 +534,8 @@ static int free_dqentry(struct dquot *dq
>  	}
>  	else {
>  		memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
> -		  sizeof(struct v2_disk_dqblk));
> -		if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
> +		  v2_dqblksz(rev));
> +		if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) {
>  			/* Insert will write block itself */
>  			if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
>  				printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
> @@ -535,27 +613,33 @@ static loff_t find_block_dqentry(struct
>  	dqbuf_t buf = getdqbuf();
>  	loff_t ret = 0;
>  	int i;
> -	struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
> +	union v2_disk_dqblk *ddquot = GETENTRIES(buf);
> +	struct super_block *sb = dquot->dq_sb;
> +	int type = dquot->dq_type;
> +	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
> +	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
>  
>  	if (!buf)
>  		return -ENOMEM;
> -	if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
> +
> +	ret = read_blk(sb, type, blk, buf);
> +	if (ret < 0) {
>  		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
>  		goto out_buf;
>  	}
>  	if (dquot->dq_id)
> -		for (i = 0; i < V2_DQSTRINBLK &&
> -		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
> +		for (i = 0; i < dqstrinblk &&
> +		     DQF_GET(ddquot+i, rev, dqb_id) != dquot->dq_id; i++);
>  	else {	/* ID 0 as a bit more complicated searching... */
> -		struct v2_disk_dqblk fakedquot;
> +		union v2_disk_dqblk fakedquot;
>  
> -		memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
> -		for (i = 0; i < V2_DQSTRINBLK; i++)
> -			if (!le32_to_cpu(ddquot[i].dqb_id) &&
> -			    memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
> +		memset(&fakedquot, 0, dqblksz);
> +		for (i = 0; i < dqstrinblk; i++)
> +			if (!DQF_GET(ddquot+i, rev, dqb_id) &&
> +			    memcmp(&fakedquot, ddquot+i, dqblksz))
>  				break;
>  	}
> -	if (i == V2_DQSTRINBLK) {
> +	if (i == dqstrinblk) {
>  		printk(KERN_ERR "VFS: Quota for id %u referenced "
>  		  "but not present.\n", dquot->dq_id);
>  		ret = -EIO;
> @@ -563,7 +647,7 @@ static loff_t find_block_dqentry(struct
>  	}
>  	else
>  		ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
> -		  v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
> +		  v2_disk_dqdbheader) + i * dqblksz;
>  out_buf:
>  	freedqbuf(buf);
>  	return ret;
> @@ -605,12 +689,13 @@ static int v2_read_dquot(struct dquot *d
>  {
>  	int type = dquot->dq_type;
>  	loff_t offset;
> -	struct v2_disk_dqblk ddquot, empty;
> +	union v2_disk_dqblk ddquot, empty;
>  	int ret = 0;
> +	struct super_block *sb = dquot->dq_sb;
>  
>  #ifdef __QUOTA_V2_PARANOIA
>  	/* Invalidated quota? */
> -	if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
> +	if (!sb || !sb_dqopt(sb)->files[type]) {
>  		printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
>  		return -EIO;
>  	}
> @@ -626,25 +711,27 @@ static int v2_read_dquot(struct dquot *d
>  		ret = offset;
>  	}
>  	else {
> +		uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision,
> +		     dqblksz = v2_dqblksz(rev);
>  		dquot->dq_off = offset;
> -		if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
> -		    (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
> -		    != sizeof(struct v2_disk_dqblk)) {
> +		ret = sb->s_op->quota_read(sb, type, (char *)&ddquot, 
> +					   dqblksz, offset);
> +		if (ret != dqblksz) {
>  			if (ret >= 0)
>  				ret = -EIO;
>  			printk(KERN_ERR "VFS: Error while reading quota "
>  			  "structure for id %u.\n", dquot->dq_id);
> -			memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
> +			memset(&ddquot, 0, dqblksz);
>  		}
>  		else {
>  			ret = 0;
>  			/* We need to escape back all-zero structure */
> -			memset(&empty, 0, sizeof(struct v2_disk_dqblk));
> -			empty.dqb_itime = cpu_to_le64(1);
> -			if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
> -				ddquot.dqb_itime = 0;
> +			memset(&empty, 0, dqblksz);
> +			DQF_PUT(&empty, rev, dqb_itime, 1);
> +			if (!memcmp(&empty, &ddquot, dqblksz))
> +				DQF_PUT(&ddquot, rev, dqb_itime, 0);
>  		}
> -		disk2memdqb(&dquot->dq_dqb, &ddquot);
> +		disk2memdqb(&dquot->dq_dqb, &ddquot, rev);
>  		if (!dquot->dq_dqb.dqb_bhardlimit &&
>  			!dquot->dq_dqb.dqb_bsoftlimit &&
>  			!dquot->dq_dqb.dqb_ihardlimit &&
> diff -rNpu quota.orig/include/linux/dqblk_v2.h quota/include/linux/dqblk_v2.h
> --- quota.orig/include/linux/dqblk_v2.h	2008-01-25 01:58:37.000000000 +0300
> +++ quota/include/linux/dqblk_v2.h	2008-03-08 22:27:02.000000000 +0300
> @@ -21,6 +21,7 @@ struct v2_mem_dqinfo {
>  	unsigned int dqi_blocks;
>  	unsigned int dqi_free_blk;
>  	unsigned int dqi_free_entry;
> +	unsigned int dqi_revision;
>  };
>  
>  #endif /* _LINUX_DQBLK_V2_H */
> diff -rNpu quota.orig/include/linux/quota.h quota/include/linux/quota.h
> --- quota.orig/include/linux/quota.h	2008-01-25 01:58:37.000000000 +0300
> +++ quota/include/linux/quota.h	2008-03-09 22:38:31.000000000 +0300
> @@ -181,12 +181,12 @@ extern spinlock_t dq_data_lock;
>   * Data for one user/group kept in memory
>   */
>  struct mem_dqblk {
> -	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> -	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
> +	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> +	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
>  	qsize_t dqb_curspace;	/* current used space */
> -	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
> -	__u32 dqb_isoftlimit;	/* preferred inode limit */
> -	__u32 dqb_curinodes;	/* current # allocated inodes */
> +	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
> +	qsize_t dqb_isoftlimit;	/* preferred inode limit */
> +	qsize_t dqb_curinodes;	/* current # allocated inodes */
>  	time_t dqb_btime;	/* time limit for excessive disk use */
>  	time_t dqb_itime;	/* time limit for excessive inode use */
>  };
> @@ -202,6 +202,10 @@ struct mem_dqinfo {
>  	unsigned long dqi_flags;
>  	unsigned int dqi_bgrace;
>  	unsigned int dqi_igrace;
> +	qsize_t dqi_maxbhardlimit;
> +	qsize_t dqi_maxbsoftlimit;
> +	qsize_t dqi_maxihardlimit;
> +	qsize_t dqi_maxisoftlimit;
>  	union {
>  		struct v1_mem_dqinfo v1_i;
>  		struct v2_mem_dqinfo v2_i;
> diff -rNpu quota.orig/include/linux/quotaio_v2.h quota/include/linux/quotaio_v2.h
> --- quota.orig/include/linux/quotaio_v2.h	2008-01-25 01:58:37.000000000 +0300
> +++ quota/include/linux/quotaio_v2.h	2008-03-09 20:34:42.000000000 +0300
> @@ -16,28 +16,51 @@
>  	0xd9c01927	/* GRPQUOTA */\
>  }
>  
> -#define V2_INITQVERSIONS {\
> +#define V2_INITQVERSIONS_R0 {\
>  	0,		/* USRQUOTA */\
>  	0		/* GRPQUOTA */\
>  }
>  
> +#define V2_INITQVERSIONS_R1 {\
> +	1,		/* USRQUOTA */\
> +	1		/* GRPQUOTA */\
> +}
> +
>  /*
>   * The following structure defines the format of the disk quota file
>   * (as it appears on disk) - the file is a radix tree whose leaves point
>   * to blocks of these structures.
>   */
> -struct v2_disk_dqblk {
> +struct v2_disk_dqblk_r0 {
>  	__le32 dqb_id;		/* id this quota applies to */
>  	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
>  	__le32 dqb_isoftlimit;	/* preferred inode limit */
>  	__le32 dqb_curinodes;	/* current # allocated inodes */
> -	__le32 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
> -	__le32 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
> +	__le32 dqb_bhardlimit;	/* absolute limit on disk space */
> +	__le32 dqb_bsoftlimit;	/* preferred limit on disk space */
> +	__le64 dqb_curspace;	/* current space occupied (in bytes) */
> +	__le64 dqb_btime;	/* time limit for excessive disk use */
> +	__le64 dqb_itime;	/* time limit for excessive inode use */
> +};
> +
> +struct v2_disk_dqblk_r1 {
> +	__le32 dqb_id;		/* id this quota applies to */
> +	__le32 dqb_padding;	/* padding field */
> +	__le64 dqb_ihardlimit;	/* absolute limit on allocated inodes */
> +	__le64 dqb_isoftlimit;	/* preferred inode limit */
> +	__le64 dqb_curinodes;	/* current # allocated inodes */
> +	__le64 dqb_bhardlimit;	/* absolute limit on disk space */
> +	__le64 dqb_bsoftlimit;	/* preferred limit on disk space */
>  	__le64 dqb_curspace;	/* current space occupied (in bytes) */
>  	__le64 dqb_btime;	/* time limit for excessive disk use */
>  	__le64 dqb_itime;	/* time limit for excessive inode use */
>  };
>  
> +union v2_disk_dqblk {
> +	struct v2_disk_dqblk_r0 disk_dqblk_r0;
> +	struct v2_disk_dqblk_r1 disk_dqblk_r1;
> +};
> +
>  /*
>   * Here are header structures as written on disk and their in-memory copies
>   */
> @@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
>  
>  /*
>   *  Structure of header of block with quota structures. It is padded to 16 bytes so
> - *  there will be space for exactly 21 quota-entries in a block
> + *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
>   */
>  struct v2_disk_dqdbheader {
>  	__le32 dqdh_next_free;	/* Number of next block with free entry */
> @@ -74,6 +97,5 @@ struct v2_disk_dqdbheader {
>  #define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
>  #define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
>  #define V2_DQTREEDEPTH	4		/* Depth of quota tree */
> -#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
>  
>  #endif /* _LINUX_QUOTAIO_V2_H */
> 
> 
> On Thursday 06 March 2008 17:48:24 Jan Kara wrote:
> >   Hello,
> > 
> >   Sorry for not responding for a few days. I was busy with other things.
> > 
> > On Thu 06-03-08 16:41:11, Andrew Perepechko wrote:
> > > We are in need of large (above 4 TB) block quota limits, but it seems like XFS filesystem 
> > > (having its own quota implementation) is the only available fs that supports them. Currently
> > > ext3 supports up to 8 TB of data and forthcoming ext4 will support even more.
> > > 
> > > Linux kernel has two implementations of quota format modules:
> > > quota_v1 (with QFMT_VFS_OLD id)
> > > quota_v2 (with QFMT_VFS_V0 id)
> > > Either uses 32-bit data types to store quota limits on disk
> > > (see struct v1_disk_dqblk and struct v2_disk_dqblk). Block quota limits 
> > > are stored in 1kb units (QUOTABLOCK_SIZE constant) which gives
> > > the largest possible quota limit of (2^32-1)*2^10 bytes ~ 4 TB.
> > > 
> > > In-memory quota entries representation suffers from the same 4 TB 
> > > limitation (see struct mem_dqblk).
> > > 
> > > The patch below adds a separate quota_v3 module which deals with 64-bit data  to solve the problem
> > > (another possible approach is to merge the code into quota_v2 module to reuse some amount of the code - 
> > > this won't reuse a lot because there're too many references to disk_dqblk structures and dependent constants).
> > > 
> > > Could you comment on the patch and the idea behind it in general?
> >   Just from a quick look. There seem to be actually two separate changes:
> > 1) Change current formats so that they refuse to set quota above treshold they
> > are able to handle. That's fine a we should do that (in a separate patch,
> > please).
> > 
> > 2) Implement new format able to handle more that 4TB limits. In principle,
> > that is fine but vfsv0 format has actually been designed so that similar
> > changes can go mostly invisible for userspace (modulo some tools updates
> > etc.). Given that the format itself doesn't change that much, we definitely
> > do not need to introduce completely new quota format. I'd just increase the
> > version number. Also I'd like to avoid unnecessary code duplication. The
> > only thing that is really different are just the conversion routines from
> > disk to memory. So I'd just modify the code in fs/quota_v2.c so that it
> > supports both versions of the quota format - you need to parametrize macros
> > like GETENTRIES(), V2_DQSTRINBLK, ... (actually make inline functions of
> > them when we are changing it), probably make union of struct v2_disk_dqblk
> > including structures for both versions and change sizeof(struct
> > v2_disk_dqblk) to some function. But all this shouldn't be that hard to do
> > in a nice way...
> > 
> > 									Honza
> 
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs
  2008-03-10 16:26     ` Jan Kara
@ 2008-03-10 17:13       ` Andrew Perepechko
  2008-03-10 17:20         ` Jan Kara
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Perepechko @ 2008-03-10 17:13 UTC (permalink / raw)
  To: Jan Kara; +Cc: linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

Hello, Jan.

I agree with your comments except for the one below.

Why do you think these macros don't do the right thing?
How can they fail? I can't agree with you that introducing
separate functions for id/time and duplicating code in 
disk2memdqblk and mem2diskdqblk is more nice than using
several uniform macros(why aren't the latter nice, btw?). 
Wasn't that you who told me that code duplication is no good? ;)

Thank you for your patience.
Andrew.

On Monday 10 March 2008 19:26:09 Jan Kara wrote:

> > +#define DQ2MQ(v) (sizeof(v) == sizeof(__u64) ? \
> > +		  (qsize_t)le64_to_cpu(v) : \
> > +		  (qsize_t)le32_to_cpu(v))
> > +
> > +#define MQ2DQ(v, newv) (sizeof(v) == sizeof(__u64) ? \
> > +			(v = cpu_to_le64((__u64)newv)) : \
> > +			(v = cpu_to_le32((__u32)newv)))
> > +
> > +#define DQF_GET(var, rev, field) (rev == 0 ? \
> > +		DQ2MQ((var)->disk_dqblk_r0.field) : \
> > +		DQ2MQ((var)->disk_dqblk_r1.field))
> > +
> > +#define DQF_PUT(var, rev, field, val) (rev == 0 ? \
> > +		MQ2DQ((var)->disk_dqblk_r0.field, val) : \
> > +		MQ2DQ((var)->disk_dqblk_r1.field, val))
>   Actually, these macros will do the right thing for dqb_id only be sheer
> luck and they won't work for dqb_curspace, dqb_itime and dqb_btime.
>   Please just get rid of them, they aren't very nice anyway. In disk2memdqb()
> and mem2diskdqb() just do
> 
>   if (rev == 0) {
> 	conversions..
>   }
>   else if (ret == 1) {
> 	conversions..
>   }
>   else {
> 	BUG();
>   }
> 
>   And for fields dqb_id and dqb_itime we use at other places introduce
> functions get_dqb_id() and get/set_dqb_itime().
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs
  2008-03-10 17:13       ` Andrew Perepechko
@ 2008-03-10 17:20         ` Jan Kara
  2008-03-14 13:08           ` Andrew Perepechko
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Kara @ 2008-03-10 17:20 UTC (permalink / raw)
  To: Andrew Perepechko
  Cc: linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

  Hello Andrew,

On Mon 10-03-08 20:13:10, Andrew Perepechko wrote:
> Why do you think these macros don't do the right thing?
> How can they fail? I can't agree with you that introducing
> separate functions for id/time and duplicating code in 
> disk2memdqblk and mem2diskdqblk is more nice than using
> several uniform macros(why aren't the latter nice, btw?). 
> Wasn't that you who told me that code duplication is no good? ;)
  Oops, sorry, I misread the macro DQ2MQ(). It will work fine.
Hmm, I still don't like the macro magic too much, but I guess it's good
enough :)

> On Monday 10 March 2008 19:26:09 Jan Kara wrote:
> 
> > > +#define DQ2MQ(v) (sizeof(v) == sizeof(__u64) ? \
> > > +		  (qsize_t)le64_to_cpu(v) : \
> > > +		  (qsize_t)le32_to_cpu(v))
> > > +
> > > +#define MQ2DQ(v, newv) (sizeof(v) == sizeof(__u64) ? \
> > > +			(v = cpu_to_le64((__u64)newv)) : \
> > > +			(v = cpu_to_le32((__u32)newv)))
> > > +
> > > +#define DQF_GET(var, rev, field) (rev == 0 ? \
> > > +		DQ2MQ((var)->disk_dqblk_r0.field) : \
> > > +		DQ2MQ((var)->disk_dqblk_r1.field))
> > > +
> > > +#define DQF_PUT(var, rev, field, val) (rev == 0 ? \
> > > +		MQ2DQ((var)->disk_dqblk_r0.field, val) : \
> > > +		MQ2DQ((var)->disk_dqblk_r1.field, val))
> >   Actually, these macros will do the right thing for dqb_id only be sheer
> > luck and they won't work for dqb_curspace, dqb_itime and dqb_btime.
> >   Please just get rid of them, they aren't very nice anyway. In disk2memdqb()
> > and mem2diskdqb() just do
> > 
> >   if (rev == 0) {
> > 	conversions..
> >   }
> >   else if (ret == 1) {
> > 	conversions..
> >   }
> >   else {
> > 	BUG();
> >   }
> > 
> >   And for fields dqb_id and dqb_itime we use at other places introduce
> > functions get_dqb_id() and get/set_dqb_itime().
> > 
								Honza

-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs
  2008-03-10 17:20         ` Jan Kara
@ 2008-03-14 13:08           ` Andrew Perepechko
  2008-03-15  4:23             ` Andreas Dilger
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Perepechko @ 2008-03-14 13:08 UTC (permalink / raw)
  To: Jan Kara; +Cc: linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

Introduce 64-bit quota limits support with QFMT_VFS_V0. 

This patch is incremental to quota-do-not-allow-setting-of-quota-limits-to-too-high-values.patch included in -mm tree.

Signed-off-by: Andrew Perepechko <andrew.perepechko@sun.com>

---

 fs/quota_v2.c              |  234 ++++++++++++++++++++++++++++++---------------
 include/linux/dqblk_v2.h   |    1
 include/linux/quota.h      |   10 -
 include/linux/quotaio_v2.h |   34 +++++-
 4 files changed, 192 insertions(+), 87 deletions(-)


diff -rNpu linux.2.6.24.3.old/fs/quota_v2.c linux-2.6.24.3/fs/quota_v2.c
--- linux.2.6.24.3.old/fs/quota_v2.c	2008-03-14 15:34:16.000000000 +0300
+++ linux-2.6.24.3/fs/quota_v2.c	2008-03-14 00:07:04.000000000 +0300
@@ -23,26 +23,61 @@ MODULE_LICENSE("GPL");
 typedef char *dqbuf_t;
 
 #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
+#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
+			 sizeof(struct v2_disk_dqdbheader)))
 
-/* Check whether given file is really vfsv0 quotafile */
-static int v2_check_quota_file(struct super_block *sb, int type)
+static inline uint v2_dqblksz(uint rev)
+{
+	uint sz = 0; /* make the compiler happy */
+
+	switch (rev) {
+	case 0:
+		sz = sizeof(struct v2_disk_dqblk_r0);
+		break;
+	case 1:
+		sz = sizeof(struct v2_disk_dqblk_r1);
+		break;
+	default:
+		BUG();
+	}
+
+	return sz;
+}
+
+/* Number of quota entries in a block */
+static inline int v2_dqstrinblk(uint rev)
+{
+	return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev);
+}
+
+/* Get revision of a quota file, -1 if it does not look a quota file */
+static int v2_quota_file_revision(struct super_block *sb, int type)
 {
 	struct v2_disk_dqheader dqhead;
 	ssize_t size;
 	static const uint quota_magics[] = V2_INITQMAGICS;
-	static const uint quota_versions[] = V2_INITQVERSIONS;
+	static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0;
+	static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1;
  
 	size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
 	if (size != sizeof(struct v2_disk_dqheader)) {
 		printk("quota_v2: failed read expected=%zd got=%zd\n",
 			sizeof(struct v2_disk_dqheader), size);
-		return 0;
+		return -1;
 	}
-	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
-	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
-		return 0;
-	return 1;
+	if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
+		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
+			return 0;
+		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
+			return 1;
+	}
+	return -1;
+}
+
+/* Check whether given file is really vfsv0 quotafile */
+static int v2_check_quota_file(struct super_block *sb, int type)
+{
+	return v2_quota_file_revision(sb, type) != -1;
 }
 
 /* Read information header from quota file */
@@ -51,6 +86,10 @@ static int v2_read_file_info(struct supe
 	struct v2_disk_dqinfo dinfo;
 	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
 	ssize_t size;
+	int rev;
+
+	rev = v2_quota_file_revision(sb, type);
+	BUG_ON(rev < 0);
 
 	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
@@ -59,15 +98,22 @@ static int v2_read_file_info(struct supe
 			sb->s_id);
 		return -1;
 	}
-	/* limits are stored as unsigned 32-bit data */
-	info->dqi_maxblimit = 0xffffffff;
-	info->dqi_maxilimit = 0xffffffff;
 	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
 	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
 	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
 	info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
 	info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
 	info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+
+	info->u.v2_i.dqi_revision = rev;
+	if (rev == 0) {
+		info->dqi_maxblimit = 0xffffffffULL;
+		info->dqi_maxilimit = 0xffffffffULL;
+	} else {
+		info->dqi_maxblimit = 0xffffffffffffffffULL;
+		info->dqi_maxilimit = 0xffffffffffffffffULL;
+	}
+
 	return 0;
 }
 
@@ -97,29 +143,47 @@ static int v2_write_file_info(struct sup
 	return 0;
 }
 
-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
-{
-	m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
-	m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
-	m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
-	m->dqb_itime = le64_to_cpu(d->dqb_itime);
-	m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
-	m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
-	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
-	m->dqb_btime = le64_to_cpu(d->dqb_btime);
-}
-
-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
-{
-	d->dqb_ihardlimit = cpu_to_le32((u32)m->dqb_ihardlimit);
-	d->dqb_isoftlimit = cpu_to_le32((u32)m->dqb_isoftlimit);
-	d->dqb_curinodes = cpu_to_le32((u32)m->dqb_curinodes);
-	d->dqb_itime = cpu_to_le64(m->dqb_itime);
-	d->dqb_bhardlimit = cpu_to_le32((u32)m->dqb_bhardlimit);
-	d->dqb_bsoftlimit = cpu_to_le32((u32)m->dqb_bsoftlimit);
-	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
-	d->dqb_btime = cpu_to_le64(m->dqb_btime);
-	d->dqb_id = cpu_to_le32(id);
+#define DQ2MQ(v) (sizeof(v) == sizeof(__u64) ? \
+		  (qsize_t)le64_to_cpu(v) : \
+		  (qsize_t)le32_to_cpu(v))
+
+#define MQ2DQ(v, newv) (sizeof(v) == sizeof(__u64) ? \
+			(v = cpu_to_le64((__u64)newv)) : \
+			(v = cpu_to_le32((__u32)newv)))
+
+#define DQF_GET(var, rev, field) (rev == 0 ? \
+		DQ2MQ((var)->disk_dqblk_r0.field) : \
+		DQ2MQ((var)->disk_dqblk_r1.field))
+
+#define DQF_PUT(var, rev, field, val) (rev == 0 ? \
+		MQ2DQ((var)->disk_dqblk_r0.field, val) : \
+		MQ2DQ((var)->disk_dqblk_r1.field, val))
+
+void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d,
+		 uint rev)
+{
+	m->dqb_ihardlimit = DQF_GET(d, rev, dqb_ihardlimit);
+	m->dqb_isoftlimit = DQF_GET(d, rev, dqb_isoftlimit);
+	m->dqb_curinodes = DQF_GET(d, rev, dqb_curinodes);
+	m->dqb_itime = DQF_GET(d, rev, dqb_itime);
+	m->dqb_bhardlimit = DQF_GET(d, rev, dqb_bhardlimit);
+	m->dqb_bsoftlimit = DQF_GET(d, rev, dqb_bsoftlimit);
+	m->dqb_curspace = DQF_GET(d, rev, dqb_curspace);
+	m->dqb_btime = DQF_GET(d, rev, dqb_btime);
+}
+
+static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m,
+			qid_t id, uint rev)
+{
+	DQF_PUT(d, rev, dqb_ihardlimit, m->dqb_ihardlimit);
+	DQF_PUT(d, rev, dqb_isoftlimit, m->dqb_isoftlimit);
+	DQF_PUT(d, rev, dqb_curinodes, m->dqb_curinodes);
+	DQF_PUT(d, rev, dqb_itime, m->dqb_itime);
+	DQF_PUT(d, rev, dqb_bhardlimit, m->dqb_bhardlimit);
+	DQF_PUT(d, rev, dqb_bsoftlimit, m->dqb_bsoftlimit);
+	DQF_PUT(d, rev, dqb_curspace, m->dqb_curspace);
+	DQF_PUT(d, rev, dqb_btime, m->dqb_btime);
+	DQF_PUT(d, rev, dqb_id, id);
 }
 
 static dqbuf_t getdqbuf(void)
@@ -271,10 +335,11 @@ static uint find_free_dqentry(struct dqu
 {
 	struct super_block *sb = dquot->dq_sb;
 	struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
-	uint blk, i;
+	uint blk, i, rev = info->u.v2_i.dqi_revision, 
+	     dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 	struct v2_disk_dqdbheader *dh;
-	struct v2_disk_dqblk *ddquot;
-	struct v2_disk_dqblk fakedquot;
+	union v2_disk_dqblk *ddquot;
+	union v2_disk_dqblk fakedquot;
 	dqbuf_t buf;
 
 	*err = 0;
@@ -301,17 +366,19 @@ static uint find_free_dqentry(struct dqu
 		info->u.v2_i.dqi_free_entry = blk;
 		mark_info_dirty(sb, dquot->dq_type);
 	}
-	if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)	/* Block will be full? */
+	/* Block will be full? */
+	if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
 		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
 			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
 			goto out_buf;
 		}
 	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
-	memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
+	memset(&fakedquot, 0, dqblksz);
 	/* Find free structure in block */
-	for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
+	for (i = 0; i < dqstrinblk && memcmp(&fakedquot,
+			ddquot+i, dqblksz); i++);
 #ifdef __QUOTA_V2_PARANOIA
-	if (i == V2_DQSTRINBLK) {
+	if (i == dqstrinblk) {
 		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
 		*err = -EIO;
 		goto out_buf;
@@ -321,7 +388,8 @@ static uint find_free_dqentry(struct dqu
 		printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
 		goto out_buf;
 	}
-	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
+	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
+			sizeof(struct v2_disk_dqdbheader)+i*dqblksz;
 	freedqbuf(buf);
 	return blk;
 out_buf:
@@ -395,7 +463,10 @@ static int v2_write_dquot(struct dquot *
 {
 	int type = dquot->dq_type;
 	ssize_t ret;
-	struct v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot, empty;
+	struct super_block *sb = dquot->dq_sb;
+	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev);
 
 	/* dq_off is guarded by dqio_mutex */
 	if (!dquot->dq_off)
@@ -404,18 +475,19 @@ static int v2_write_dquot(struct dquot *
 			return ret;
 		}
 	spin_lock(&dq_data_lock);
-	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
+	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev);
 	/* Argh... We may need to write structure full of zeroes but that would be
 	 * treated as an empty place by the rest of the code. Format change would
 	 * be definitely cleaner but the problems probably are not worth it */
-	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-	if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-		ddquot.dqb_itime = cpu_to_le64(1);
+	memset(&empty, 0, dqblksz);
+	if (!memcmp(&empty, &ddquot, dqblksz))
+		DQF_PUT(&ddquot, rev, dqb_itime, 1);
 	spin_unlock(&dq_data_lock);
-	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
-	      (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
-	if (ret != sizeof(struct v2_disk_dqblk)) {
-		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
+	ret = sb->s_op->quota_write(sb, type,
+	      (char *)&ddquot, dqblksz, dquot->dq_off);
+	if (ret != dqblksz) {
+		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", 
+			sb->s_id);
 		if (ret >= 0)
 			ret = -ENOSPC;
 	}
@@ -434,6 +506,7 @@ static int free_dqentry(struct dquot *dq
 	struct v2_disk_dqdbheader *dh;
 	dqbuf_t buf = getdqbuf();
 	int ret = 0;
+	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
 
 	if (!buf)
 		return -ENOMEM;
@@ -459,8 +532,8 @@ static int free_dqentry(struct dquot *dq
 	}
 	else {
 		memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
-		  sizeof(struct v2_disk_dqblk));
-		if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
+		  v2_dqblksz(rev));
+		if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) {
 			/* Insert will write block itself */
 			if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
 				printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
@@ -538,27 +611,33 @@ static loff_t find_block_dqentry(struct
 	dqbuf_t buf = getdqbuf();
 	loff_t ret = 0;
 	int i;
-	struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
+	union v2_disk_dqblk *ddquot = GETENTRIES(buf);
+	struct super_block *sb = dquot->dq_sb;
+	int type = dquot->dq_type;
+	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 
 	if (!buf)
 		return -ENOMEM;
-	if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
+
+	ret = read_blk(sb, type, blk, buf);
+	if (ret < 0) {
 		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
 		goto out_buf;
 	}
 	if (dquot->dq_id)
-		for (i = 0; i < V2_DQSTRINBLK &&
-		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
+		for (i = 0; i < dqstrinblk &&
+		     DQF_GET(ddquot+i, rev, dqb_id) != dquot->dq_id; i++);
 	else {	/* ID 0 as a bit more complicated searching... */
-		struct v2_disk_dqblk fakedquot;
+		union v2_disk_dqblk fakedquot;
 
-		memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
-		for (i = 0; i < V2_DQSTRINBLK; i++)
-			if (!le32_to_cpu(ddquot[i].dqb_id) &&
-			    memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
+		memset(&fakedquot, 0, dqblksz);
+		for (i = 0; i < dqstrinblk; i++)
+			if (!DQF_GET(ddquot+i, rev, dqb_id) &&
+			    memcmp(&fakedquot, ddquot+i, dqblksz))
 				break;
 	}
-	if (i == V2_DQSTRINBLK) {
+	if (i == dqstrinblk) {
 		printk(KERN_ERR "VFS: Quota for id %u referenced "
 		  "but not present.\n", dquot->dq_id);
 		ret = -EIO;
@@ -566,7 +645,7 @@ static loff_t find_block_dqentry(struct
 	}
 	else
 		ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
-		  v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
+		  v2_disk_dqdbheader) + i * dqblksz;
 out_buf:
 	freedqbuf(buf);
 	return ret;
@@ -608,12 +687,13 @@ static int v2_read_dquot(struct dquot *d
 {
 	int type = dquot->dq_type;
 	loff_t offset;
-	struct v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot, empty;
 	int ret = 0;
+	struct super_block *sb = dquot->dq_sb;
 
 #ifdef __QUOTA_V2_PARANOIA
 	/* Invalidated quota? */
-	if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
+	if (!sb || !sb_dqopt(sb)->files[type]) {
 		printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
 		return -EIO;
 	}
@@ -629,25 +709,27 @@ static int v2_read_dquot(struct dquot *d
 		ret = offset;
 	}
 	else {
+		uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision,
+		     dqblksz = v2_dqblksz(rev);
 		dquot->dq_off = offset;
-		if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
-		    (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
-		    != sizeof(struct v2_disk_dqblk)) {
+		ret = sb->s_op->quota_read(sb, type, (char *)&ddquot, 
+					   dqblksz, offset);
+		if (ret != dqblksz) {
 			if (ret >= 0)
 				ret = -EIO;
 			printk(KERN_ERR "VFS: Error while reading quota "
 			  "structure for id %u.\n", dquot->dq_id);
-			memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
+			memset(&ddquot, 0, dqblksz);
 		}
 		else {
 			ret = 0;
 			/* We need to escape back all-zero structure */
-			memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-			empty.dqb_itime = cpu_to_le64(1);
-			if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-				ddquot.dqb_itime = 0;
+			memset(&empty, 0, dqblksz);
+			DQF_PUT(&empty, rev, dqb_itime, 1);
+			if (!memcmp(&empty, &ddquot, dqblksz))
+				DQF_PUT(&ddquot, rev, dqb_itime, 0);
 		}
-		disk2memdqb(&dquot->dq_dqb, &ddquot);
+		disk2memdqb(&dquot->dq_dqb, &ddquot, rev);
 		if (!dquot->dq_dqb.dqb_bhardlimit &&
 			!dquot->dq_dqb.dqb_bsoftlimit &&
 			!dquot->dq_dqb.dqb_ihardlimit &&
diff -rNpu linux.2.6.24.3.old/include/linux/dqblk_v2.h linux-2.6.24.3/include/linux/dqblk_v2.h
--- linux.2.6.24.3.old/include/linux/dqblk_v2.h	2008-02-26 03:20:20.000000000 +0300
+++ linux-2.6.24.3/include/linux/dqblk_v2.h	2008-03-13 22:01:48.000000000 +0300
@@ -21,6 +21,7 @@ struct v2_mem_dqinfo {
 	unsigned int dqi_blocks;
 	unsigned int dqi_free_blk;
 	unsigned int dqi_free_entry;
+	unsigned int dqi_revision;
 };
 
 #endif /* _LINUX_DQBLK_V2_H */
diff -rNpu linux.2.6.24.3.old/include/linux/quota.h linux-2.6.24.3/include/linux/quota.h
--- linux.2.6.24.3.old/include/linux/quota.h	2008-03-14 15:34:16.000000000 +0300
+++ linux-2.6.24.3/include/linux/quota.h	2008-03-13 22:01:48.000000000 +0300
@@ -181,12 +181,12 @@ extern spinlock_t dq_data_lock;
  * Data for one user/group kept in memory
  */
 struct mem_dqblk {
-	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
-	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
+	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
 	qsize_t dqb_curspace;	/* current used space */
-	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__u32 dqb_isoftlimit;	/* preferred inode limit */
-	__u32 dqb_curinodes;	/* current # allocated inodes */
+	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	qsize_t dqb_isoftlimit;	/* preferred inode limit */
+	qsize_t dqb_curinodes;	/* current # allocated inodes */
 	time_t dqb_btime;	/* time limit for excessive disk use */
 	time_t dqb_itime;	/* time limit for excessive inode use */
 };
diff -rNpu linux.2.6.24.3.old/include/linux/quotaio_v2.h linux-2.6.24.3/include/linux/quotaio_v2.h
--- linux.2.6.24.3.old/include/linux/quotaio_v2.h	2008-02-26 03:20:20.000000000 +0300
+++ linux-2.6.24.3/include/linux/quotaio_v2.h	2008-03-13 22:01:48.000000000 +0300
@@ -16,28 +16,51 @@
 	0xd9c01927	/* GRPQUOTA */\
 }
 
-#define V2_INITQVERSIONS {\
+#define V2_INITQVERSIONS_R0 {\
 	0,		/* USRQUOTA */\
 	0		/* GRPQUOTA */\
 }
 
+#define V2_INITQVERSIONS_R1 {\
+	1,		/* USRQUOTA */\
+	1		/* GRPQUOTA */\
+}
+
 /*
  * The following structure defines the format of the disk quota file
  * (as it appears on disk) - the file is a radix tree whose leaves point
  * to blocks of these structures.
  */
-struct v2_disk_dqblk {
+struct v2_disk_dqblk_r0 {
 	__le32 dqb_id;		/* id this quota applies to */
 	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
 	__le32 dqb_isoftlimit;	/* preferred inode limit */
 	__le32 dqb_curinodes;	/* current # allocated inodes */
-	__le32 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
-	__le32 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
+	__le32 dqb_bhardlimit;	/* absolute limit on disk space */
+	__le32 dqb_bsoftlimit;	/* preferred limit on disk space */
+	__le64 dqb_curspace;	/* current space occupied (in bytes) */
+	__le64 dqb_btime;	/* time limit for excessive disk use */
+	__le64 dqb_itime;	/* time limit for excessive inode use */
+};
+
+struct v2_disk_dqblk_r1 {
+	__le32 dqb_id;		/* id this quota applies to */
+	__le32 dqb_padding;	/* padding field */
+	__le64 dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	__le64 dqb_isoftlimit;	/* preferred inode limit */
+	__le64 dqb_curinodes;	/* current # allocated inodes */
+	__le64 dqb_bhardlimit;	/* absolute limit on disk space */
+	__le64 dqb_bsoftlimit;	/* preferred limit on disk space */
 	__le64 dqb_curspace;	/* current space occupied (in bytes) */
 	__le64 dqb_btime;	/* time limit for excessive disk use */
 	__le64 dqb_itime;	/* time limit for excessive inode use */
 };
 
+union v2_disk_dqblk {
+	struct v2_disk_dqblk_r0 disk_dqblk_r0;
+	struct v2_disk_dqblk_r1 disk_dqblk_r1;
+};
+
 /*
  * Here are header structures as written on disk and their in-memory copies
  */
@@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
 
 /*
  *  Structure of header of block with quota structures. It is padded to 16 bytes so
- *  there will be space for exactly 21 quota-entries in a block
+ *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
  */
 struct v2_disk_dqdbheader {
 	__le32 dqdh_next_free;	/* Number of next block with free entry */
@@ -74,6 +97,5 @@ struct v2_disk_dqdbheader {
 #define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
 #define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
 #define V2_DQTREEDEPTH	4		/* Depth of quota tree */
-#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
 
 #endif /* _LINUX_QUOTAIO_V2_H */

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs
  2008-03-14 13:08           ` Andrew Perepechko
@ 2008-03-15  4:23             ` Andreas Dilger
  2008-03-15 13:24               ` Andrew Perepechko
  0 siblings, 1 reply; 21+ messages in thread
From: Andreas Dilger @ 2008-03-15  4:23 UTC (permalink / raw)
  To: Andrew Perepechko
  Cc: Jan Kara, linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

On Mar 14, 2008  16:08 +0300, Andrew Perepechko wrote:
> @@ -395,7 +463,10 @@ static int v2_write_dquot(struct dquot *
>  {
>  	int type = dquot->dq_type;
>  	ssize_t ret;
> -	struct v2_disk_dqblk ddquot, empty;
> +	union v2_disk_dqblk ddquot, empty;
> +	struct super_block *sb = dquot->dq_sb;
> +	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
> +	uint dqblksz = v2_dqblksz(rev);

In a few places you add new on-stack variables like "sb", but they aren't
used more than 1 or 2 times.  While it makes the code a tiny bit clearer
(though not largely so for "sb" because it is only dquot->dq_sb) it does
increase the stack usage, and that is never a good idea.

> +		union v2_disk_dqblk fakedquot;
>  
> +		memset(&fakedquot, 0, dqblksz);
> +		for (i = 0; i < dqstrinblk; i++)
> +			if (!DQF_GET(ddquot+i, rev, dqb_id) &&
> +			    memcmp(&fakedquot, ddquot+i, dqblksz))

Hmm, allocating "fakedquot" on the stack just to compare it to zero
doesn't seem like a good use of space.  What about doing the memcmp()
against page_address(ZERO_PAGE(0))?  It might be nice to have a permanent
mapping of ZERO_PAGE(0) like void *zero_buffer that can be used for this.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs
  2008-03-15  4:23             ` Andreas Dilger
@ 2008-03-15 13:24               ` Andrew Perepechko
  2008-03-15 13:32                 ` Andrew Perepechko
  2008-03-15 14:45                 ` Andreas Dilger
  0 siblings, 2 replies; 21+ messages in thread
From: Andrew Perepechko @ 2008-03-15 13:24 UTC (permalink / raw)
  To: Andreas Dilger
  Cc: Jan Kara, linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

Hello, Andreas

On Saturday 15 March 2008 07:23:16 you wrote:
> On Mar 14, 2008  16:08 +0300, Andrew Perepechko wrote:
> > @@ -395,7 +463,10 @@ static int v2_write_dquot(struct dquot *
> >  {
> >  	int type = dquot->dq_type;
> >  	ssize_t ret;
> > -	struct v2_disk_dqblk ddquot, empty;
> > +	union v2_disk_dqblk ddquot, empty;
> > +	struct super_block *sb = dquot->dq_sb;
> > +	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
> > +	uint dqblksz = v2_dqblksz(rev);
> 
> In a few places you add new on-stack variables like "sb", but they aren't
> used more than 1 or 2 times.  While it makes the code a tiny bit clearer
> (though not largely so for "sb" because it is only dquot->dq_sb) it does
> increase the stack usage, and that is never a good idea.

I agree. Probably, it makes sense to rollback sb change. Though this may lead
to additional line breaks.

> 
> > +		union v2_disk_dqblk fakedquot;
> >  
> > +		memset(&fakedquot, 0, dqblksz);
> > +		for (i = 0; i < dqstrinblk; i++)
> > +			if (!DQF_GET(ddquot+i, rev, dqb_id) &&
> > +			    memcmp(&fakedquot, ddquot+i, dqblksz))
> 
> Hmm, allocating "fakedquot" on the stack just to compare it to zero
> doesn't seem like a good use of space.  What about doing the memcmp()
> against page_address(ZERO_PAGE(0))?  It might be nice to have a permanent
> mapping of ZERO_PAGE(0) like void *zero_buffer that can be used for this.
> 

What do you think about something like this?

--- quota_v2.c.saved	2008-03-14 00:07:04.000000000 +0300
+++ quota_v2.c	2008-03-15 16:17:04.000000000 +0300
@@ -26,6 +26,12 @@ typedef char *dqbuf_t;
 #define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
 			 sizeof(struct v2_disk_dqdbheader)))
 
+static union v2_disk_dqblk emptydquot;
+static union v2_disk_dqblk fakedquot[2] = {
+	{.disk_dqblk_r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)}},
+	{.disk_dqblk_r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)}}
+};
+
 static inline uint v2_dqblksz(uint rev)
 {
 	uint sz = 0; /* make the compiler happy */
@@ -339,7 +345,6 @@ static uint find_free_dqentry(struct dqu
 	     dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 	struct v2_disk_dqdbheader *dh;
 	union v2_disk_dqblk *ddquot;
-	union v2_disk_dqblk fakedquot;
 	dqbuf_t buf;
 
 	*err = 0;
@@ -373,9 +378,8 @@ static uint find_free_dqentry(struct dqu
 			goto out_buf;
 		}
 	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
-	memset(&fakedquot, 0, dqblksz);
 	/* Find free structure in block */
-	for (i = 0; i < dqstrinblk && memcmp(&fakedquot,
+	for (i = 0; i < dqstrinblk && memcmp(&emptydquot,
 			ddquot+i, dqblksz); i++);
 #ifdef __QUOTA_V2_PARANOIA
 	if (i == dqstrinblk) {
@@ -463,7 +467,7 @@ static int v2_write_dquot(struct dquot *
 {
 	int type = dquot->dq_type;
 	ssize_t ret;
-	union v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot;
 	struct super_block *sb = dquot->dq_sb;
 	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
 	uint dqblksz = v2_dqblksz(rev);
@@ -479,8 +483,7 @@ static int v2_write_dquot(struct dquot *
 	/* Argh... We may need to write structure full of zeroes but that would be
 	 * treated as an empty place by the rest of the code. Format change would
 	 * be definitely cleaner but the problems probably are not worth it */
-	memset(&empty, 0, dqblksz);
-	if (!memcmp(&empty, &ddquot, dqblksz))
+	if (!memcmp(&emptydquot, &ddquot, dqblksz))
 		DQF_PUT(&ddquot, rev, dqb_itime, 1);
 	spin_unlock(&dq_data_lock);
 	ret = sb->s_op->quota_write(sb, type,
@@ -629,12 +632,9 @@ static loff_t find_block_dqentry(struct
 		for (i = 0; i < dqstrinblk &&
 		     DQF_GET(ddquot+i, rev, dqb_id) != dquot->dq_id; i++);
 	else {	/* ID 0 as a bit more complicated searching... */
-		union v2_disk_dqblk fakedquot;
-
-		memset(&fakedquot, 0, dqblksz);
 		for (i = 0; i < dqstrinblk; i++)
 			if (!DQF_GET(ddquot+i, rev, dqb_id) &&
-			    memcmp(&fakedquot, ddquot+i, dqblksz))
+			    memcmp(&emptydquot, ddquot+i, dqblksz))
 				break;
 	}
 	if (i == dqstrinblk) {
@@ -687,7 +687,7 @@ static int v2_read_dquot(struct dquot *d
 {
 	int type = dquot->dq_type;
 	loff_t offset;
-	union v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot;
 	int ret = 0;
 	struct super_block *sb = dquot->dq_sb;
 
@@ -724,9 +724,7 @@ static int v2_read_dquot(struct dquot *d
 		else {
 			ret = 0;
 			/* We need to escape back all-zero structure */
-			memset(&empty, 0, dqblksz);
-			DQF_PUT(&empty, rev, dqb_itime, 1);
-			if (!memcmp(&empty, &ddquot, dqblksz))
+			if (!memcmp(&fakedquot[rev], &ddquot, dqblksz))
 				DQF_PUT(&ddquot, rev, dqb_itime, 0);
 		}
 		disk2memdqb(&dquot->dq_dqb, &ddquot, rev);

Cheers.
Andrew.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs
  2008-03-15 13:24               ` Andrew Perepechko
@ 2008-03-15 13:32                 ` Andrew Perepechko
  2008-03-15 14:45                 ` Andreas Dilger
  1 sibling, 0 replies; 21+ messages in thread
From: Andrew Perepechko @ 2008-03-15 13:32 UTC (permalink / raw)
  To: Andreas Dilger
  Cc: Jan Kara, linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

> +static union v2_disk_dqblk emptydquot;
> +static union v2_disk_dqblk fakedquot[2] = {
> +	{.disk_dqblk_r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)}},
> +	{.disk_dqblk_r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)}}
> +};
> +

static const, indeed.

Cheers.
Andrew.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs
  2008-03-15 13:24               ` Andrew Perepechko
  2008-03-15 13:32                 ` Andrew Perepechko
@ 2008-03-15 14:45                 ` Andreas Dilger
  2008-03-15 18:58                   ` [RFC] quota: 64-bit limits with vfs, updated Andrew Perepechko
  1 sibling, 1 reply; 21+ messages in thread
From: Andreas Dilger @ 2008-03-15 14:45 UTC (permalink / raw)
  To: Andrew Perepechko
  Cc: Jan Kara, linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

On Mar 15, 2008  16:24 +0300, Andrew Perepechko wrote:
> On Saturday 15 March 2008 07:23:16 Andreas Dilger wrote:
> > In a few places you add new on-stack variables like "sb", but they aren't
> > used more than 1 or 2 times.  While it makes the code a tiny bit clearer
> > (though not largely so for "sb" because it is only dquot->dq_sb) it does
> > increase the stack usage, and that is never a good idea.
> 
> I agree. Probably, it makes sense to rollback sb change. Though this may lead
> to additional line breaks.

Line breaks don't consume stack space ;-).

> > Hmm, allocating "fakedquot" on the stack just to compare it to zero
> > doesn't seem like a good use of space.  What about doing the memcmp()
> > against page_address(ZERO_PAGE(0))?  It might be nice to have a permanent
> > mapping of ZERO_PAGE(0) like void *zero_buffer that can be used for this.
> 
> What do you think about something like this?

> 
> +static union v2_disk_dqblk emptydquot;
> +static union v2_disk_dqblk fakedquot[2] = {
> +	{.disk_dqblk_r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)}},
> +	{.disk_dqblk_r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)}}
> +};

Yes, since these structures are constant there are no multi-thread issues.
Not only does this use less stack space, but it is better performing as
well due to less memset() overhead.

NB - it seems you have whitespace at the end of some lines, please remove it.

>@@ -724,9 +724,7 @@ static int v2_read_dquot(struct dquot *d
>		else {
>			ret = 0;
>			/* We need to escape back all-zero structure */
>-			memset(&empty, 0, dqblksz);
>-			DQF_PUT(&empty, rev, dqb_itime, 1);
>-			if (!memcmp(&empty, &ddquot, dqblksz))
>+			if (!memcmp(&fakedquot[rev], &ddquot, dqblksz))
>				DQF_PUT(&ddquot, rev, dqb_itime, 0);
>		}

I wonder if there is a CPU instruction "compare memory with zero" that
would be available for something like this, that could be used like:

			if (!memzcmp(&ddquot, dqblksz))
				DQF_PUT(&ddquot, rev, dqb_itime, 0);

I know I've had to use a few hacks like this to check if some buffer is
zero filled, so having it efficiently done by the CPU would be a win.
It would be possible to use something like generic_find_first_bit():

#define memzcmp(ptr, bytes) (generic_find_first_bit(ptr, bytes*8) >= bytes*8)

which breaks down to an assembly instruction "bsfq" on x86_64 if the
parameter is constant.  That might make it more efficient to have 2
separate codepaths here with a constant "bytes" parameter so that the
larger memzcmp() can be optimized.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs, updated
  2008-03-15 14:45                 ` Andreas Dilger
@ 2008-03-15 18:58                   ` Andrew Perepechko
  2008-03-15 22:47                     ` Andreas Dilger
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Perepechko @ 2008-03-15 18:58 UTC (permalink / raw)
  To: Jan Kara
  Cc: linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov, Andreas Dilger

Introduce 64-bit quota limits support with QFMT_VFS_V0. 

This patch is incremental to quota-do-not-allow-setting-of-quota-limits-to-too-high-values.patch included in -mm tree.

Signed-off-by: Andrew Perepechko <andrew.perepechko@sun.com>

---

 fs/quota_v2.c              |  227 ++++++++++++++++++++++++++++++---------------
 include/linux/dqblk_v2.h   |    1
 include/linux/quota.h      |   10 -
 include/linux/quotaio_v2.h |   34 +++++-
 4 files changed, 186 insertions(+), 86 deletions(-)


diff -rNpu linux.2.6.24.3.old/fs/quota_v2.c linux-2.6.24.3/fs/quota_v2.c
--- linux.2.6.24.3.old/fs/quota_v2.c	2008-03-14 15:34:16.000000000 +0300
+++ linux-2.6.24.3/fs/quota_v2.c	2008-03-15 20:52:26.000000000 +0300
@@ -23,26 +23,67 @@ MODULE_LICENSE("GPL");
 typedef char *dqbuf_t;
 
 #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
+#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
+			 sizeof(struct v2_disk_dqdbheader)))
 
-/* Check whether given file is really vfsv0 quotafile */
-static int v2_check_quota_file(struct super_block *sb, int type)
+static const union v2_disk_dqblk emptydquot;
+static const union v2_disk_dqblk fakedquot[2] = {
+	{.disk_dqblk_r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} },
+	{.disk_dqblk_r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
+};
+
+static inline uint v2_dqblksz(uint rev)
+{
+	uint sz = 0; /* make the compiler happy */
+
+	switch (rev) {
+	case 0:
+		sz = sizeof(struct v2_disk_dqblk_r0);
+		break;
+	case 1:
+		sz = sizeof(struct v2_disk_dqblk_r1);
+		break;
+	default:
+		BUG();
+	}
+
+	return sz;
+}
+
+/* Number of quota entries in a block */
+static inline int v2_dqstrinblk(uint rev)
+{
+	return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev);
+}
+
+/* Get revision of a quota file, -1 if it does not look a quota file */
+static int v2_quota_file_revision(struct super_block *sb, int type)
 {
 	struct v2_disk_dqheader dqhead;
 	ssize_t size;
 	static const uint quota_magics[] = V2_INITQMAGICS;
-	static const uint quota_versions[] = V2_INITQVERSIONS;
+	static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0;
+	static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1;
  
 	size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
 	if (size != sizeof(struct v2_disk_dqheader)) {
 		printk("quota_v2: failed read expected=%zd got=%zd\n",
 			sizeof(struct v2_disk_dqheader), size);
-		return 0;
+		return -1;
 	}
-	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
-	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
-		return 0;
-	return 1;
+	if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
+		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
+			return 0;
+		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
+			return 1;
+	}
+	return -1;
+}
+
+/* Check whether given file is really vfsv0 quotafile */
+static int v2_check_quota_file(struct super_block *sb, int type)
+{
+	return v2_quota_file_revision(sb, type) != -1;
 }
 
 /* Read information header from quota file */
@@ -51,6 +92,10 @@ static int v2_read_file_info(struct supe
 	struct v2_disk_dqinfo dinfo;
 	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
 	ssize_t size;
+	int rev;
+
+	rev = v2_quota_file_revision(sb, type);
+	BUG_ON(rev < 0);
 
 	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
@@ -59,15 +104,22 @@ static int v2_read_file_info(struct supe
 			sb->s_id);
 		return -1;
 	}
-	/* limits are stored as unsigned 32-bit data */
-	info->dqi_maxblimit = 0xffffffff;
-	info->dqi_maxilimit = 0xffffffff;
 	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
 	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
 	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
 	info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
 	info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
 	info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+
+	info->u.v2_i.dqi_revision = rev;
+	if (rev == 0) {
+		info->dqi_maxblimit = 0xffffffffULL;
+		info->dqi_maxilimit = 0xffffffffULL;
+	} else {
+		info->dqi_maxblimit = 0xffffffffffffffffULL;
+		info->dqi_maxilimit = 0xffffffffffffffffULL;
+	}
+
 	return 0;
 }
 
@@ -97,29 +149,47 @@ static int v2_write_file_info(struct sup
 	return 0;
 }
 
-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
-{
-	m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
-	m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
-	m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
-	m->dqb_itime = le64_to_cpu(d->dqb_itime);
-	m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
-	m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
-	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
-	m->dqb_btime = le64_to_cpu(d->dqb_btime);
-}
-
-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
-{
-	d->dqb_ihardlimit = cpu_to_le32((u32)m->dqb_ihardlimit);
-	d->dqb_isoftlimit = cpu_to_le32((u32)m->dqb_isoftlimit);
-	d->dqb_curinodes = cpu_to_le32((u32)m->dqb_curinodes);
-	d->dqb_itime = cpu_to_le64(m->dqb_itime);
-	d->dqb_bhardlimit = cpu_to_le32((u32)m->dqb_bhardlimit);
-	d->dqb_bsoftlimit = cpu_to_le32((u32)m->dqb_bsoftlimit);
-	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
-	d->dqb_btime = cpu_to_le64(m->dqb_btime);
-	d->dqb_id = cpu_to_le32(id);
+#define DQ2MQ(v) (sizeof(v) == sizeof(__u64) ? \
+		  (qsize_t)le64_to_cpu(v) : \
+		  (qsize_t)le32_to_cpu(v))
+
+#define MQ2DQ(v, newv) (sizeof(v) == sizeof(__u64) ? \
+			(v = cpu_to_le64((__u64)newv)) : \
+			(v = cpu_to_le32((__u32)newv)))
+
+#define DQF_GET(var, rev, field) (rev == 0 ? \
+		DQ2MQ((var)->disk_dqblk_r0.field) : \
+		DQ2MQ((var)->disk_dqblk_r1.field))
+
+#define DQF_PUT(var, rev, field, val) (rev == 0 ? \
+		MQ2DQ((var)->disk_dqblk_r0.field, val) : \
+		MQ2DQ((var)->disk_dqblk_r1.field, val))
+
+void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d,
+		 uint rev)
+{
+	m->dqb_ihardlimit = DQF_GET(d, rev, dqb_ihardlimit);
+	m->dqb_isoftlimit = DQF_GET(d, rev, dqb_isoftlimit);
+	m->dqb_curinodes = DQF_GET(d, rev, dqb_curinodes);
+	m->dqb_itime = DQF_GET(d, rev, dqb_itime);
+	m->dqb_bhardlimit = DQF_GET(d, rev, dqb_bhardlimit);
+	m->dqb_bsoftlimit = DQF_GET(d, rev, dqb_bsoftlimit);
+	m->dqb_curspace = DQF_GET(d, rev, dqb_curspace);
+	m->dqb_btime = DQF_GET(d, rev, dqb_btime);
+}
+
+static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m,
+			qid_t id, uint rev)
+{
+	DQF_PUT(d, rev, dqb_ihardlimit, m->dqb_ihardlimit);
+	DQF_PUT(d, rev, dqb_isoftlimit, m->dqb_isoftlimit);
+	DQF_PUT(d, rev, dqb_curinodes, m->dqb_curinodes);
+	DQF_PUT(d, rev, dqb_itime, m->dqb_itime);
+	DQF_PUT(d, rev, dqb_bhardlimit, m->dqb_bhardlimit);
+	DQF_PUT(d, rev, dqb_bsoftlimit, m->dqb_bsoftlimit);
+	DQF_PUT(d, rev, dqb_curspace, m->dqb_curspace);
+	DQF_PUT(d, rev, dqb_btime, m->dqb_btime);
+	DQF_PUT(d, rev, dqb_id, id);
 }
 
 static dqbuf_t getdqbuf(void)
@@ -271,10 +341,10 @@ static uint find_free_dqentry(struct dqu
 {
 	struct super_block *sb = dquot->dq_sb;
 	struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
-	uint blk, i;
+	uint blk, i, rev = info->u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 	struct v2_disk_dqdbheader *dh;
-	struct v2_disk_dqblk *ddquot;
-	struct v2_disk_dqblk fakedquot;
+	union v2_disk_dqblk *ddquot;
 	dqbuf_t buf;
 
 	*err = 0;
@@ -301,17 +371,18 @@ static uint find_free_dqentry(struct dqu
 		info->u.v2_i.dqi_free_entry = blk;
 		mark_info_dirty(sb, dquot->dq_type);
 	}
-	if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)	/* Block will be full? */
+	/* Block will be full? */
+	if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
 		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
 			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
 			goto out_buf;
 		}
 	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
-	memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
 	/* Find free structure in block */
-	for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
+	for (i = 0; i < dqstrinblk && memcmp(&emptydquot,
+			ddquot+i, dqblksz); i++);
 #ifdef __QUOTA_V2_PARANOIA
-	if (i == V2_DQSTRINBLK) {
+	if (i == dqstrinblk) {
 		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
 		*err = -EIO;
 		goto out_buf;
@@ -321,7 +392,8 @@ static uint find_free_dqentry(struct dqu
 		printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
 		goto out_buf;
 	}
-	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
+	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
+			sizeof(struct v2_disk_dqdbheader)+i*dqblksz;
 	freedqbuf(buf);
 	return blk;
 out_buf:
@@ -395,7 +467,9 @@ static int v2_write_dquot(struct dquot *
 {
 	int type = dquot->dq_type;
 	ssize_t ret;
-	struct v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot;
+	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev);
 
 	/* dq_off is guarded by dqio_mutex */
 	if (!dquot->dq_off)
@@ -404,18 +478,18 @@ static int v2_write_dquot(struct dquot *
 			return ret;
 		}
 	spin_lock(&dq_data_lock);
-	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
+	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev);
 	/* Argh... We may need to write structure full of zeroes but that would be
 	 * treated as an empty place by the rest of the code. Format change would
 	 * be definitely cleaner but the problems probably are not worth it */
-	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-	if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-		ddquot.dqb_itime = cpu_to_le64(1);
+	if (!memcmp(&emptydquot, &ddquot, dqblksz))
+		DQF_PUT(&ddquot, rev, dqb_itime, 1);
 	spin_unlock(&dq_data_lock);
 	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
-	      (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
-	if (ret != sizeof(struct v2_disk_dqblk)) {
-		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
+	      (char *)&ddquot, dqblksz, dquot->dq_off);
+	if (ret != dqblksz) {
+		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
+			dquot->dq_sb->s_id);
 		if (ret >= 0)
 			ret = -ENOSPC;
 	}
@@ -434,6 +508,7 @@ static int free_dqentry(struct dquot *dq
 	struct v2_disk_dqdbheader *dh;
 	dqbuf_t buf = getdqbuf();
 	int ret = 0;
+	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
 
 	if (!buf)
 		return -ENOMEM;
@@ -459,8 +534,8 @@ static int free_dqentry(struct dquot *dq
 	}
 	else {
 		memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
-		  sizeof(struct v2_disk_dqblk));
-		if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
+		  v2_dqblksz(rev));
+		if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) {
 			/* Insert will write block itself */
 			if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
 				printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
@@ -538,27 +613,29 @@ static loff_t find_block_dqentry(struct
 	dqbuf_t buf = getdqbuf();
 	loff_t ret = 0;
 	int i;
-	struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
+	union v2_disk_dqblk *ddquot = GETENTRIES(buf);
+	int type = dquot->dq_type;
+	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 
 	if (!buf)
 		return -ENOMEM;
-	if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
+
+	ret = read_blk(dquot->dq_sb, type, blk, buf);
+	if (ret < 0) {
 		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
 		goto out_buf;
 	}
 	if (dquot->dq_id)
-		for (i = 0; i < V2_DQSTRINBLK &&
-		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
+		for (i = 0; i < dqstrinblk &&
+		     DQF_GET(ddquot+i, rev, dqb_id) != dquot->dq_id; i++);
 	else {	/* ID 0 as a bit more complicated searching... */
-		struct v2_disk_dqblk fakedquot;
-
-		memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
-		for (i = 0; i < V2_DQSTRINBLK; i++)
-			if (!le32_to_cpu(ddquot[i].dqb_id) &&
-			    memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
+		for (i = 0; i < dqstrinblk; i++)
+			if (!DQF_GET(ddquot+i, rev, dqb_id) &&
+			    memcmp(&emptydquot, ddquot+i, dqblksz))
 				break;
 	}
-	if (i == V2_DQSTRINBLK) {
+	if (i == dqstrinblk) {
 		printk(KERN_ERR "VFS: Quota for id %u referenced "
 		  "but not present.\n", dquot->dq_id);
 		ret = -EIO;
@@ -566,7 +643,7 @@ static loff_t find_block_dqentry(struct
 	}
 	else
 		ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
-		  v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
+		  v2_disk_dqdbheader) + i * dqblksz;
 out_buf:
 	freedqbuf(buf);
 	return ret;
@@ -608,7 +685,7 @@ static int v2_read_dquot(struct dquot *d
 {
 	int type = dquot->dq_type;
 	loff_t offset;
-	struct v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot;
 	int ret = 0;
 
 #ifdef __QUOTA_V2_PARANOIA
@@ -629,25 +706,25 @@ static int v2_read_dquot(struct dquot *d
 		ret = offset;
 	}
 	else {
+		uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.
+			   dqi_revision, dqblksz = v2_dqblksz(rev);
 		dquot->dq_off = offset;
-		if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
-		    (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
-		    != sizeof(struct v2_disk_dqblk)) {
+		ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
+					   (char *)&ddquot, dqblksz, offset);
+		if (ret != dqblksz) {
 			if (ret >= 0)
 				ret = -EIO;
 			printk(KERN_ERR "VFS: Error while reading quota "
 			  "structure for id %u.\n", dquot->dq_id);
-			memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
+			memset(&ddquot, 0, dqblksz);
 		}
 		else {
 			ret = 0;
 			/* We need to escape back all-zero structure */
-			memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-			empty.dqb_itime = cpu_to_le64(1);
-			if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-				ddquot.dqb_itime = 0;
+			if (!memcmp(&fakedquot[rev], &ddquot, dqblksz))
+				DQF_PUT(&ddquot, rev, dqb_itime, 0);
 		}
-		disk2memdqb(&dquot->dq_dqb, &ddquot);
+		disk2memdqb(&dquot->dq_dqb, &ddquot, rev);
 		if (!dquot->dq_dqb.dqb_bhardlimit &&
 			!dquot->dq_dqb.dqb_bsoftlimit &&
 			!dquot->dq_dqb.dqb_ihardlimit &&
diff -rNpu linux.2.6.24.3.old/include/linux/dqblk_v2.h linux-2.6.24.3/include/linux/dqblk_v2.h
--- linux.2.6.24.3.old/include/linux/dqblk_v2.h	2008-02-26 03:20:20.000000000 +0300
+++ linux-2.6.24.3/include/linux/dqblk_v2.h	2008-03-13 22:01:48.000000000 +0300
@@ -21,6 +21,7 @@ struct v2_mem_dqinfo {
 	unsigned int dqi_blocks;
 	unsigned int dqi_free_blk;
 	unsigned int dqi_free_entry;
+	unsigned int dqi_revision;
 };
 
 #endif /* _LINUX_DQBLK_V2_H */
diff -rNpu linux.2.6.24.3.old/include/linux/quota.h linux-2.6.24.3/include/linux/quota.h
--- linux.2.6.24.3.old/include/linux/quota.h	2008-03-14 15:34:16.000000000 +0300
+++ linux-2.6.24.3/include/linux/quota.h	2008-03-13 22:01:48.000000000 +0300
@@ -181,12 +181,12 @@ extern spinlock_t dq_data_lock;
  * Data for one user/group kept in memory
  */
 struct mem_dqblk {
-	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
-	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
+	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
 	qsize_t dqb_curspace;	/* current used space */
-	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__u32 dqb_isoftlimit;	/* preferred inode limit */
-	__u32 dqb_curinodes;	/* current # allocated inodes */
+	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	qsize_t dqb_isoftlimit;	/* preferred inode limit */
+	qsize_t dqb_curinodes;	/* current # allocated inodes */
 	time_t dqb_btime;	/* time limit for excessive disk use */
 	time_t dqb_itime;	/* time limit for excessive inode use */
 };
diff -rNpu linux.2.6.24.3.old/include/linux/quotaio_v2.h linux-2.6.24.3/include/linux/quotaio_v2.h
--- linux.2.6.24.3.old/include/linux/quotaio_v2.h	2008-02-26 03:20:20.000000000 +0300
+++ linux-2.6.24.3/include/linux/quotaio_v2.h	2008-03-13 22:01:48.000000000 +0300
@@ -16,28 +16,51 @@
 	0xd9c01927	/* GRPQUOTA */\
 }
 
-#define V2_INITQVERSIONS {\
+#define V2_INITQVERSIONS_R0 {\
 	0,		/* USRQUOTA */\
 	0		/* GRPQUOTA */\
 }
 
+#define V2_INITQVERSIONS_R1 {\
+	1,		/* USRQUOTA */\
+	1		/* GRPQUOTA */\
+}
+
 /*
  * The following structure defines the format of the disk quota file
  * (as it appears on disk) - the file is a radix tree whose leaves point
  * to blocks of these structures.
  */
-struct v2_disk_dqblk {
+struct v2_disk_dqblk_r0 {
 	__le32 dqb_id;		/* id this quota applies to */
 	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
 	__le32 dqb_isoftlimit;	/* preferred inode limit */
 	__le32 dqb_curinodes;	/* current # allocated inodes */
-	__le32 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
-	__le32 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
+	__le32 dqb_bhardlimit;	/* absolute limit on disk space */
+	__le32 dqb_bsoftlimit;	/* preferred limit on disk space */
+	__le64 dqb_curspace;	/* current space occupied (in bytes) */
+	__le64 dqb_btime;	/* time limit for excessive disk use */
+	__le64 dqb_itime;	/* time limit for excessive inode use */
+};
+
+struct v2_disk_dqblk_r1 {
+	__le32 dqb_id;		/* id this quota applies to */
+	__le32 dqb_padding;	/* padding field */
+	__le64 dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	__le64 dqb_isoftlimit;	/* preferred inode limit */
+	__le64 dqb_curinodes;	/* current # allocated inodes */
+	__le64 dqb_bhardlimit;	/* absolute limit on disk space */
+	__le64 dqb_bsoftlimit;	/* preferred limit on disk space */
 	__le64 dqb_curspace;	/* current space occupied (in bytes) */
 	__le64 dqb_btime;	/* time limit for excessive disk use */
 	__le64 dqb_itime;	/* time limit for excessive inode use */
 };
 
+union v2_disk_dqblk {
+	struct v2_disk_dqblk_r0 disk_dqblk_r0;
+	struct v2_disk_dqblk_r1 disk_dqblk_r1;
+};
+
 /*
  * Here are header structures as written on disk and their in-memory copies
  */
@@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
 
 /*
  *  Structure of header of block with quota structures. It is padded to 16 bytes so
- *  there will be space for exactly 21 quota-entries in a block
+ *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
  */
 struct v2_disk_dqdbheader {
 	__le32 dqdh_next_free;	/* Number of next block with free entry */
@@ -74,6 +97,5 @@ struct v2_disk_dqdbheader {
 #define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
 #define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
 #define V2_DQTREEDEPTH	4		/* Depth of quota tree */
-#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
 
 #endif /* _LINUX_QUOTAIO_V2_H */

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs, updated
  2008-03-15 18:58                   ` [RFC] quota: 64-bit limits with vfs, updated Andrew Perepechko
@ 2008-03-15 22:47                     ` Andreas Dilger
  2008-03-16  1:14                       ` Andrew Perepechko
  0 siblings, 1 reply; 21+ messages in thread
From: Andreas Dilger @ 2008-03-15 22:47 UTC (permalink / raw)
  To: Andrew Perepechko
  Cc: Jan Kara, linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

On Mar 15, 2008  21:58 +0300, Andrew Perepechko wrote:
> +static inline uint v2_dqblksz(uint rev)
> +{
> +	uint sz = 0; /* make the compiler happy */
> +
> +	switch (rev) {
> +	case 0:
> +		sz = sizeof(struct v2_disk_dqblk_r0);
> +		break;
> +	case 1:
> +		sz = sizeof(struct v2_disk_dqblk_r1);
> +		break;
> +	default:
> +		BUG();
> +	}
> +
> +	return sz;
> +}

Is this ever used on values that are read from the disk?  If yes, then
having it BUG() because there is corruption on the disk is bad.

> +static int v2_quota_file_revision(struct super_block *sb, int type)
>  {
>  	size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
>  	if (size != sizeof(struct v2_disk_dqheader)) {
>  		printk("quota_v2: failed read expected=%zd got=%zd\n",
>  			sizeof(struct v2_disk_dqheader), size);
> -		return 0;
> +		return -1;
>  	}
> -	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
> -	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
> -		return 0;
> -	return 1;
> +	if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
> +		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
> +			return 0;
> +		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
> +			return 1;
> +	}
> +	return -1;
> +}
> @@ -51,6 +92,10 @@ static int v2_read_file_info(struct supe
> +
> +	rev = v2_quota_file_revision(sb, type);
> +	BUG_ON(rev < 0);
 
This is no good - if there is an error from ->quota_read() then the kernel
will BUG() here.  You need proper (non fatal) error handling when you are
first loading the quota information into memory.

> +/* Check whether given file is really vfsv0 quotafile */
> +static int v2_check_quota_file(struct super_block *sb, int type)
> +{
> +	return v2_quota_file_revision(sb, type) != -1;
>  }

This could be inline.

> +#define DQ2MQ(v) (sizeof(v) == sizeof(__u64) ? \
> +		  (qsize_t)le64_to_cpu(v) : \
> +		  (qsize_t)le32_to_cpu(v))
> +
> +#define MQ2DQ(v, newv) (sizeof(v) == sizeof(__u64) ? \
> +			(v = cpu_to_le64((__u64)newv)) : \
> +			(v = cpu_to_le32((__u32)newv)))
> +
> +#define DQF_GET(var, rev, field) (rev == 0 ? \
> +		DQ2MQ((var)->disk_dqblk_r0.field) : \
> +		DQ2MQ((var)->disk_dqblk_r1.field))
> +
> +#define DQF_PUT(var, rev, field, val) (rev == 0 ? \
> +		MQ2DQ((var)->disk_dqblk_r0.field, val) : \
> +		MQ2DQ((var)->disk_dqblk_r1.field, val))
> +
> +void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d,
> +		 uint rev)
> +{
> +	m->dqb_ihardlimit = DQF_GET(d, rev, dqb_ihardlimit);
> +	m->dqb_isoftlimit = DQF_GET(d, rev, dqb_isoftlimit);
> +	m->dqb_curinodes = DQF_GET(d, rev, dqb_curinodes);
> +	m->dqb_itime = DQF_GET(d, rev, dqb_itime);
> +	m->dqb_bhardlimit = DQF_GET(d, rev, dqb_bhardlimit);
> +	m->dqb_bsoftlimit = DQF_GET(d, rev, dqb_bsoftlimit);
> +	m->dqb_curspace = DQF_GET(d, rev, dqb_curspace);
> +	m->dqb_btime = DQF_GET(d, rev, dqb_btime);
> +}

While this produces nice looking code, it isn't very efficient to run.
It has 8 checks for "rev" per call, and it would have an additional
8 checks for "sizeof" except those are constant and optimized away
by the compiler.  If you can convince me that the compiler will also
optimize the 8 checks for "rev" into a single branch then it can
stay, otherwise you should split it into 2 large sections based on
"rev" and then decode r0 and r1 structures separately.

> @@ -301,17 +371,18 @@ static uint find_free_dqentry(struct dqu
> +	/* Block will be full? */
> +	if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
>  		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
>  			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
>  			goto out_buf;
>  		}

Can you please clean this up a bit while here:

	/* Block will be full? */
	if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk &&
	    (*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
		printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove "
				"block (%u) from entry free list.\n", blk);
		goto out_buf;
	}

> +	for (i = 0; i < dqstrinblk && memcmp(&emptydquot,
> +			ddquot+i, dqblksz); i++);

This is also quite easy to be confused, please write it as:

	for (i = 0; i < dqstrinblk && memcmp(&emptydquot,ddquot+i,dqblksz); i++)
		/* empty loop to count used dquot structs in block */;

In fact, it would be even better to just increment "ddquot" in the loop to
avoid having to re-do the offset each time:

	for (i = 0; i < dqstrinblk && memcmp(&emptydquot, ddquot, dqblksz);
	     i++, ddquot++)
		/* empty loop to count used dquot structs in block */;

> -	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
> +	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
> +			sizeof(struct v2_disk_dqdbheader)+i*dqblksz;

With the above change to ddquot you can then use it directly here:

	dquot->dq_off = (blk << V2_DQBLKSIZE_BITS) +
			((char *)ddquot - (char *)buf);

> @@ -395,7 +467,9 @@ static int v2_write_dquot(struct dquot *
>  {
>  	int type = dquot->dq_type;

type is only used twice in this function, please remove it from the stack.

> @@ -538,27 +613,29 @@ static loff_t find_block_dqentry(struct
> +	union v2_disk_dqblk *ddquot = GETENTRIES(buf);
> +	int type = dquot->dq_type;

type only used twice in this function, please remove from stack.

> +	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
> +	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
>  
>  	if (dquot->dq_id)
> -		for (i = 0; i < V2_DQSTRINBLK &&
> -		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
> +		for (i = 0; i < dqstrinblk &&
> +		     DQF_GET(ddquot+i, rev, dqb_id) != dquot->dq_id; i++);
>  	else {	/* ID 0 as a bit more complicated searching... */

Spaces around " + ", and please add a { } around this block because second
part has { } already.

> +		for (i = 0; i < dqstrinblk; i++)
> +			if (!DQF_GET(ddquot+i, rev, dqb_id) &&
> +			    memcmp(&emptydquot, ddquot+i, dqblksz))
>  				break;
>  	}

Spaces around " + " here also.  Actually, please run the patch through
"checkpatch.pl" to find any similar issues.

> @@ -608,7 +685,7 @@ static int v2_read_dquot(struct dquot *d
>  {
>  	int type = dquot->dq_type;
>  	loff_t offset;
> -	struct v2_disk_dqblk ddquot, empty;
> +	union v2_disk_dqblk ddquot;

Please remove type from stack.

> @@ -629,25 +706,25 @@ static int v2_read_dquot(struct dquot *d
> +		uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.
> +			   dqi_revision, dqblksz = v2_dqblksz(rev);

Use another line with "uint dqblksz = v2_dqblksz(rev);" here.

>  struct mem_dqblk {
> -	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> -	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
> +	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> +	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
>  	qsize_t dqb_curspace;	/* current used space */
> -	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
> -	__u32 dqb_isoftlimit;	/* preferred inode limit */
> -	__u32 dqb_curinodes;	/* current # allocated inodes */
> +	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
> +	qsize_t dqb_isoftlimit;	/* preferred inode limit */
> +	qsize_t dqb_curinodes;	/* current # allocated inodes */
>  	time_t dqb_btime;	/* time limit for excessive disk use */
>  	time_t dqb_itime;	/* time limit for excessive inode use */
>  };

Is there a reason to use "qsize_t" instead of just using __u64 directly?

> +++ linux-2.6.24.3/include/linux/quotaio_v2.h	2008-03-13 22:01:48.000000000 +0300
> @@ -16,28 +16,51 @@
> @@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
>  
>  /*
>   *  Structure of header of block with quota structures. It is padded to 16 bytes so
> - *  there will be space for exactly 21 quota-entries in a block
> + *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
>   */

Hmm, when this comment says "block" does it mean "1024-byte block" or
filesystem block?  For 1024-byte block it is true that both 48-byte (r0) and
72-byte (r1) records fit evenly, but for 4096-byte blocks 72-byte records
do not fit evenly.  Is that a problem?

Please line wrap or adjust comment to fit in 80 colums.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs, updated
  2008-03-15 22:47                     ` Andreas Dilger
@ 2008-03-16  1:14                       ` Andrew Perepechko
  2008-03-16 11:21                         ` Andrew Perepechko
  2008-03-17 14:51                         ` Jan Kara
  0 siblings, 2 replies; 21+ messages in thread
From: Andrew Perepechko @ 2008-03-16  1:14 UTC (permalink / raw)
  To: Andreas Dilger
  Cc: Jan Kara, linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

Hello, Andreas

On Sunday 16 March 2008 01:47:26 Andreas Dilger wrote:
> 
> Is this ever used on values that are read from the disk?  If yes, then
> having it BUG() because there is corruption on the disk is bad.

This is used on revision variable that is either 0 or 1, which is
determined by the special function. As this special function cannot
return any other values, having rev!=0&&rev!=1 just means we got
a memory corruption.

> This is no good - if there is an error from ->quota_read() then the kernel
> will BUG() here.  You need proper (non fatal) error handling when you are
> first loading the quota information into memory.

My point is that something very very nasty have happened if we succeed on first
version check and fail right after that. In principle, we dont even need this
second check as we have done the check at this point. But using
results from previous check would need some interface changes(sigh).

I'll change the BUG_ON to returning error, but my point is if it happens, then
user is not expecting any sane error handling or it just won't help him. :)

> 
> This could be inline.
> 

I agree.

> 
> While this produces nice looking code, it isn't very efficient to run.
> It has 8 checks for "rev" per call, and it would have an additional
> 8 checks for "sizeof" except those are constant and optimized away
> by the compiler.  If you can convince me that the compiler will also
> optimize the 8 checks for "rev" into a single branch then it can
> stay, otherwise you should split it into 2 large sections based on
> "rev" and then decode r0 and r1 structures separately.
> 

I'll check the code produced by gcc.

> > @@ -301,17 +371,18 @@ static uint find_free_dqentry(struct dqu
> > +	/* Block will be full? */
> > +	if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
> >  		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
> >  			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
> >  			goto out_buf;
> >  		}
> 
> Can you please clean this up a bit while here:
> 
> 	/* Block will be full? */
> 	if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk &&
> 	    (*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
> 		printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove "
> 				"block (%u) from entry free list.\n", blk);
> 		goto out_buf;
> 	}

While this makes sense to me, this change would be prohibited by
checkpatch.pl because of adding (*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)
assignment in condition. Indeed, we can move further with the change, but
code refactoring/cleaning is not the purpose of my patch. :)

> 
> > +	for (i = 0; i < dqstrinblk && memcmp(&emptydquot,
> > +			ddquot+i, dqblksz); i++);
> 
> This is also quite easy to be confused, please write it as:
> 
> 	for (i = 0; i < dqstrinblk && memcmp(&emptydquot,ddquot+i,dqblksz); i++)
> 		/* empty loop to count used dquot structs in block */;

You killed two spaces to fit the line in 80 chars. IIRC, checkpatch.pl requires that 
args should be separated with spaces.

> 
> In fact, it would be even better to just increment "ddquot" in the loop to
> avoid having to re-do the offset each time:
> 
> 	for (i = 0; i < dqstrinblk && memcmp(&emptydquot, ddquot, dqblksz);
> 	     i++, ddquot++)
> 		/* empty loop to count used dquot structs in block */;
> 
> > -	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
> > +	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
> > +			sizeof(struct v2_disk_dqdbheader)+i*dqblksz;
> 
> With the above change to ddquot you can then use it directly here:
> 
> 	dquot->dq_off = (blk << V2_DQBLKSIZE_BITS) +
> 			((char *)ddquot - (char *)buf);

I agree.

> 
> > @@ -395,7 +467,9 @@ static int v2_write_dquot(struct dquot *
> >  {
> >  	int type = dquot->dq_type;
> 
> type is only used twice in this function, please remove it from the stack.
> 
> > @@ -538,27 +613,29 @@ static loff_t find_block_dqentry(struct
> > +	union v2_disk_dqblk *ddquot = GETENTRIES(buf);
> > +	int type = dquot->dq_type;
> 
> type only used twice in this function, please remove from stack.
> 
> > +	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
> > +	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
> >  
> >  	if (dquot->dq_id)
> > -		for (i = 0; i < V2_DQSTRINBLK &&
> > -		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
> > +		for (i = 0; i < dqstrinblk &&
> > +		     DQF_GET(ddquot+i, rev, dqb_id) != dquot->dq_id; i++);
> >  	else {	/* ID 0 as a bit more complicated searching... */
> 
> Spaces around " + ", and please add a { } around this block because second
> part has { } already.
> 
> > +		for (i = 0; i < dqstrinblk; i++)
> > +			if (!DQF_GET(ddquot+i, rev, dqb_id) &&
> > +			    memcmp(&emptydquot, ddquot+i, dqblksz))
> >  				break;
> >  	}
> 
> Spaces around " + " here also.  Actually, please run the patch through
> "checkpatch.pl" to find any similar issues.
> 
> > @@ -608,7 +685,7 @@ static int v2_read_dquot(struct dquot *d
> >  {
> >  	int type = dquot->dq_type;
> >  	loff_t offset;
> > -	struct v2_disk_dqblk ddquot, empty;
> > +	union v2_disk_dqblk ddquot;
> 
> Please remove type from stack.
> 

These comments refer mostly to enhancements to the original code. While I agree with most of them,
I see no reason, why they should be a part of this very patch. As for checkpatch.pl, I did run
it before emailing the patch and it did not find absent spaces around "+" to be an issue:

panda:/usr/src/linux-2.6.24.3 # md5sum /usr/src/quota.patch
e306aaf884b11709d9e744dfa5e89672  /usr/src/quota.patch
panda:/usr/src/linux-2.6.24.3 # scripts/checkpatch.pl /usr/src/quota.patch
ERROR: Missing Signed-off-by: line(s)

total: 1 errors, 0 warnings, 460 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.


> > @@ -629,25 +706,25 @@ static int v2_read_dquot(struct dquot *d
> > +		uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.
> > +			   dqi_revision, dqblksz = v2_dqblksz(rev);
> 
> Use another line with "uint dqblksz = v2_dqblksz(rev);" here.
> 

I agree.

> >  struct mem_dqblk {
> > -	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> > -	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
> > +	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> > +	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
> >  	qsize_t dqb_curspace;	/* current used space */
> > -	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
> > -	__u32 dqb_isoftlimit;	/* preferred inode limit */
> > -	__u32 dqb_curinodes;	/* current # allocated inodes */
> > +	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
> > +	qsize_t dqb_isoftlimit;	/* preferred inode limit */
> > +	qsize_t dqb_curinodes;	/* current # allocated inodes */
> >  	time_t dqb_btime;	/* time limit for excessive disk use */
> >  	time_t dqb_itime;	/* time limit for excessive inode use */
> >  };
> 
> Is there a reason to use "qsize_t" instead of just using __u64 directly?
> 

Well, I'm not sure if there's a special meaning for qsize_t. I have only followed
the dqb_curspace convention, which is of qsize_t type. While I see no
special usage for qsize_t (well, it looks more nice than __u64 :), I at the same time 
see no problems using qsize_t. Maybe Jan will tell us more about qsize_t.

> > +++ linux-2.6.24.3/include/linux/quotaio_v2.h	2008-03-13 22:01:48.000000000 +0300
> > @@ -16,28 +16,51 @@
> > @@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
> >  
> >  /*
> >   *  Structure of header of block with quota structures. It is padded to 16 bytes so
> > - *  there will be space for exactly 21 quota-entries in a block
> > + *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
> >   */
> 
> Hmm, when this comment says "block" does it mean "1024-byte block" or
> filesystem block?  For 1024-byte block it is true that both 48-byte (r0) and
> 72-byte (r1) records fit evenly, but for 4096-byte blocks 72-byte records
> do not fit evenly.  Is that a problem?

Quota file is internally divided into 1024 byte blocks (see V2_DQBLKSIZE constant) each
consisting of a block header (which is used for free space management) and a fixed number 
of quota entries. This is not related in any way to fs blocks. quota format module
interacts with underlying storage only through quota_read/quota_write interface.

I'm going to update the patch with respect to your comments.
Btw, I lost "static" before disk2memdqblk, I'm going to update this one too.

Thanks and cheers.
Andrew.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs, updated
  2008-03-16  1:14                       ` Andrew Perepechko
@ 2008-03-16 11:21                         ` Andrew Perepechko
  2008-03-17 14:35                           ` Jan Kara
  2008-03-17 14:51                         ` Jan Kara
  1 sibling, 1 reply; 21+ messages in thread
From: Andrew Perepechko @ 2008-03-16 11:21 UTC (permalink / raw)
  To: Jan Kara
  Cc: Andreas Dilger, linux-fsdevel, Johann Lombardi,
	Zhiyong Landen tian, Alex Lyashkov

Introduce 64-bit quota limits support with QFMT_VFS_V0. 

This patch is incremental to quota-do-not-allow-setting-of-quota-limits-to-too-high-values.patch included in -mm tree.

Signed-off-by: Andrew Perepechko <andrew.perepechko@sun.com>

---

 fs/quota_v2.c              |  263 ++++++++++++++++++++++++++++++++-------------
 include/linux/dqblk_v2.h   |    1
 include/linux/quota.h      |   10 -
 include/linux/quotaio_v2.h |   34 ++++-
 4 files changed, 222 insertions(+), 86 deletions(-)

diff -rNpu linux.2.6.24.3.old/fs/quota_v2.c linux-2.6.24.3/fs/quota_v2.c
--- linux.2.6.24.3.old/fs/quota_v2.c	2008-03-14 15:34:16.000000000 +0300
+++ linux-2.6.24.3/fs/quota_v2.c	2008-03-16 13:46:01.000000000 +0300
@@ -23,26 +23,64 @@ MODULE_LICENSE("GPL");
 typedef char *dqbuf_t;
 
 #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
+#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
+			 sizeof(struct v2_disk_dqdbheader)))
+#define REV_ASSERT(r) BUG_ON((rev) != 0 && (rev) != 1)
 
-/* Check whether given file is really vfsv0 quotafile */
-static int v2_check_quota_file(struct super_block *sb, int type)
+static const union v2_disk_dqblk emptydquot;
+static const union v2_disk_dqblk fakedquot[2] = {
+	{.r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} },
+	{.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
+};
+
+static inline uint v2_dqblksz(uint rev)
+{
+	uint sz;
+
+	REV_ASSERT(rev);
+
+	if (rev == 0)
+		sz = sizeof(struct v2_disk_dqblk_r0);
+	else
+		sz = sizeof(struct v2_disk_dqblk_r1);
+
+	return sz;
+}
+
+/* Number of quota entries in a block */
+static inline int v2_dqstrinblk(uint rev)
+{
+	return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev);
+}
+
+/* Get revision of a quota file, -1 if it does not look a quota file */
+static int v2_quota_file_revision(struct super_block *sb, int type)
 {
 	struct v2_disk_dqheader dqhead;
 	ssize_t size;
 	static const uint quota_magics[] = V2_INITQMAGICS;
-	static const uint quota_versions[] = V2_INITQVERSIONS;
+	static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0;
+	static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1;
  
 	size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
 	if (size != sizeof(struct v2_disk_dqheader)) {
 		printk("quota_v2: failed read expected=%zd got=%zd\n",
 			sizeof(struct v2_disk_dqheader), size);
-		return 0;
+		return -1;
 	}
-	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
-	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
-		return 0;
-	return 1;
+	if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
+		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
+			return 0;
+		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
+			return 1;
+	}
+	return -1;
+}
+
+/* Check whether given file is really vfsv0 quotafile */
+static inline int v2_check_quota_file(struct super_block *sb, int type)
+{
+	return v2_quota_file_revision(sb, type) != -1;
 }
 
 /* Read information header from quota file */
@@ -51,6 +89,13 @@ static int v2_read_file_info(struct supe
 	struct v2_disk_dqinfo dinfo;
 	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
 	ssize_t size;
+	int rev;
+
+	rev = v2_quota_file_revision(sb, type);
+	if (rev < 0) {
+		printk(KERN_WARNING "Second quota file check failed.\n");
+		return -1;
+	}
 
 	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
@@ -59,15 +104,22 @@ static int v2_read_file_info(struct supe
 			sb->s_id);
 		return -1;
 	}
-	/* limits are stored as unsigned 32-bit data */
-	info->dqi_maxblimit = 0xffffffff;
-	info->dqi_maxilimit = 0xffffffff;
 	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
 	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
 	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
 	info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
 	info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
 	info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+
+	info->u.v2_i.dqi_revision = rev;
+	if (rev == 0) {
+		info->dqi_maxblimit = 0xffffffffULL;
+		info->dqi_maxilimit = 0xffffffffULL;
+	} else {
+		info->dqi_maxblimit = 0xffffffffffffffffULL;
+		info->dqi_maxilimit = 0xffffffffffffffffULL;
+	}
+
 	return 0;
 }
 
@@ -97,29 +149,61 @@ static int v2_write_file_info(struct sup
 	return 0;
 }
 
-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
+static void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d, uint rev)
 {
-	m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
-	m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
-	m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
-	m->dqb_itime = le64_to_cpu(d->dqb_itime);
-	m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
-	m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
-	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
-	m->dqb_btime = le64_to_cpu(d->dqb_btime);
-}
-
-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
-{
-	d->dqb_ihardlimit = cpu_to_le32((u32)m->dqb_ihardlimit);
-	d->dqb_isoftlimit = cpu_to_le32((u32)m->dqb_isoftlimit);
-	d->dqb_curinodes = cpu_to_le32((u32)m->dqb_curinodes);
-	d->dqb_itime = cpu_to_le64(m->dqb_itime);
-	d->dqb_bhardlimit = cpu_to_le32((u32)m->dqb_bhardlimit);
-	d->dqb_bsoftlimit = cpu_to_le32((u32)m->dqb_bsoftlimit);
-	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
-	d->dqb_btime = cpu_to_le64(m->dqb_btime);
-	d->dqb_id = cpu_to_le32(id);
+	REV_ASSERT(rev);
+
+	if (rev == 0) {
+		struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
+		m->dqb_ihardlimit = le32_to_cpu(ddqblk->dqb_ihardlimit);
+		m->dqb_isoftlimit = le32_to_cpu(ddqblk->dqb_isoftlimit);
+		m->dqb_curinodes = le32_to_cpu(ddqblk->dqb_curinodes);
+		m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
+		m->dqb_bhardlimit = le32_to_cpu(ddqblk->dqb_bhardlimit);
+		m->dqb_bsoftlimit = le32_to_cpu(ddqblk->dqb_bsoftlimit);
+		m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
+		m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
+	} else {
+		struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
+		m->dqb_ihardlimit = le64_to_cpu(ddqblk->dqb_ihardlimit);
+		m->dqb_isoftlimit = le64_to_cpu(ddqblk->dqb_isoftlimit);
+		m->dqb_curinodes = le64_to_cpu(ddqblk->dqb_curinodes);
+		m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
+		m->dqb_bhardlimit = le64_to_cpu(ddqblk->dqb_bhardlimit);
+		m->dqb_bsoftlimit = le64_to_cpu(ddqblk->dqb_bsoftlimit);
+		m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
+		m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
+	}
+}
+
+static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m,
+			qid_t id, uint rev)
+{
+	REV_ASSERT(rev);
+
+	if (rev == 0) {
+		struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
+		ddqblk->dqb_id = cpu_to_le32(id);
+		ddqblk->dqb_ihardlimit = cpu_to_le32((__u32)m->dqb_ihardlimit);
+		ddqblk->dqb_isoftlimit = cpu_to_le32((__u32)m->dqb_isoftlimit);
+		ddqblk->dqb_curinodes = cpu_to_le32((__u32)m->dqb_curinodes);
+		ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
+		ddqblk->dqb_bhardlimit = cpu_to_le32((__u32)m->dqb_bhardlimit);
+		ddqblk->dqb_bsoftlimit = cpu_to_le32((__u32)m->dqb_bsoftlimit);
+		ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+		ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
+	} else {
+		struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
+		ddqblk->dqb_id = cpu_to_le32(id);
+		ddqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
+		ddqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
+		ddqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
+		ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
+		ddqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
+		ddqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
+		ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+		ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
+	}
 }
 
 static dqbuf_t getdqbuf(void)
@@ -271,10 +355,10 @@ static uint find_free_dqentry(struct dqu
 {
 	struct super_block *sb = dquot->dq_sb;
 	struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
-	uint blk, i;
+	uint blk, i, rev = info->u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 	struct v2_disk_dqdbheader *dh;
-	struct v2_disk_dqblk *ddquot;
-	struct v2_disk_dqblk fakedquot;
+	union v2_disk_dqblk *ddquot;
 	dqbuf_t buf;
 
 	*err = 0;
@@ -301,17 +385,18 @@ static uint find_free_dqentry(struct dqu
 		info->u.v2_i.dqi_free_entry = blk;
 		mark_info_dirty(sb, dquot->dq_type);
 	}
-	if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)	/* Block will be full? */
+	/* Block will be full? */
+	if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
 		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
 			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
 			goto out_buf;
 		}
 	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
-	memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
 	/* Find free structure in block */
-	for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
+	for (i = 0; i < dqstrinblk && memcmp(&emptydquot, ddquot, dqblksz);
+	     i++, ddquot++);
 #ifdef __QUOTA_V2_PARANOIA
-	if (i == V2_DQSTRINBLK) {
+	if (i == dqstrinblk) {
 		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
 		*err = -EIO;
 		goto out_buf;
@@ -321,7 +406,8 @@ static uint find_free_dqentry(struct dqu
 		printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
 		goto out_buf;
 	}
-	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
+	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
+			((char *)ddquot - (char *)buf);
 	freedqbuf(buf);
 	return blk;
 out_buf:
@@ -395,7 +481,9 @@ static int v2_write_dquot(struct dquot *
 {
 	int type = dquot->dq_type;
 	ssize_t ret;
-	struct v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot;
+	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev);
 
 	/* dq_off is guarded by dqio_mutex */
 	if (!dquot->dq_off)
@@ -404,18 +492,22 @@ static int v2_write_dquot(struct dquot *
 			return ret;
 		}
 	spin_lock(&dq_data_lock);
-	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
+	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev);
 	/* Argh... We may need to write structure full of zeroes but that would be
 	 * treated as an empty place by the rest of the code. Format change would
 	 * be definitely cleaner but the problems probably are not worth it */
-	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-	if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-		ddquot.dqb_itime = cpu_to_le64(1);
+	if (!memcmp(&emptydquot, &ddquot, dqblksz)) {
+		if (rev == 0)
+			ddquot.r0.dqb_itime = cpu_to_le64(1);
+		else
+			ddquot.r1.dqb_itime = cpu_to_le64(1);
+	}
 	spin_unlock(&dq_data_lock);
 	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
-	      (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
-	if (ret != sizeof(struct v2_disk_dqblk)) {
-		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
+	      (char *)&ddquot, dqblksz, dquot->dq_off);
+	if (ret != dqblksz) {
+		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
+			dquot->dq_sb->s_id);
 		if (ret >= 0)
 			ret = -ENOSPC;
 	}
@@ -434,6 +526,7 @@ static int free_dqentry(struct dquot *dq
 	struct v2_disk_dqdbheader *dh;
 	dqbuf_t buf = getdqbuf();
 	int ret = 0;
+	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
 
 	if (!buf)
 		return -ENOMEM;
@@ -459,8 +552,8 @@ static int free_dqentry(struct dquot *dq
 	}
 	else {
 		memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
-		  sizeof(struct v2_disk_dqblk));
-		if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
+		  v2_dqblksz(rev));
+		if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) {
 			/* Insert will write block itself */
 			if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
 				printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
@@ -532,41 +625,56 @@ static int v2_delete_dquot(struct dquot
 	return remove_tree(dquot, &tmp, 0);
 }
 
+static inline __u32 dqid(union v2_disk_dqblk *ddquot, uint rev)
+{
+	__u32 dq_id;
+
+	REV_ASSERT(rev);
+
+	if (rev == 0)
+		dq_id = le32_to_cpu(ddquot->r0.dqb_id);
+	else
+		dq_id = le32_to_cpu(ddquot->r1.dqb_id);
+
+	return dq_id;
+}
+
 /* Find entry in block */
 static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
 {
 	dqbuf_t buf = getdqbuf();
 	loff_t ret = 0;
 	int i;
-	struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
+	union v2_disk_dqblk *ddquot = GETENTRIES(buf);
+	int type = dquot->dq_type;
+	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 
 	if (!buf)
 		return -ENOMEM;
-	if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
+
+	ret = read_blk(dquot->dq_sb, type, blk, buf);
+	if (ret < 0) {
 		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
 		goto out_buf;
 	}
 	if (dquot->dq_id)
-		for (i = 0; i < V2_DQSTRINBLK &&
-		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
+		for (i = 0; i < dqstrinblk && dqid(ddquot, rev) != dquot->dq_id;
+		     i++, ddquot++);
 	else {	/* ID 0 as a bit more complicated searching... */
-		struct v2_disk_dqblk fakedquot;
-
-		memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
-		for (i = 0; i < V2_DQSTRINBLK; i++)
-			if (!le32_to_cpu(ddquot[i].dqb_id) &&
-			    memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
+		for (i = 0; i < dqstrinblk; i++, ddquot++)
+			if (!dqid(ddquot, rev) &&
+			    memcmp(&emptydquot, ddquot, dqblksz))
 				break;
 	}
-	if (i == V2_DQSTRINBLK) {
+	if (i == dqstrinblk) {
 		printk(KERN_ERR "VFS: Quota for id %u referenced "
 		  "but not present.\n", dquot->dq_id);
 		ret = -EIO;
 		goto out_buf;
 	}
 	else
-		ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
-		  v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
+		ret = (blk << V2_DQBLKSIZE_BITS)+((char *)ddquot-(char *)buf);
 out_buf:
 	freedqbuf(buf);
 	return ret;
@@ -608,7 +716,7 @@ static int v2_read_dquot(struct dquot *d
 {
 	int type = dquot->dq_type;
 	loff_t offset;
-	struct v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot;
 	int ret = 0;
 
 #ifdef __QUOTA_V2_PARANOIA
@@ -629,25 +737,30 @@ static int v2_read_dquot(struct dquot *d
 		ret = offset;
 	}
 	else {
+		uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.
+			   dqi_revision;
+		uint  dqblksz = v2_dqblksz(rev);
 		dquot->dq_off = offset;
-		if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
-		    (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
-		    != sizeof(struct v2_disk_dqblk)) {
+		ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
+					   (char *)&ddquot, dqblksz, offset);
+		if (ret != dqblksz) {
 			if (ret >= 0)
 				ret = -EIO;
 			printk(KERN_ERR "VFS: Error while reading quota "
 			  "structure for id %u.\n", dquot->dq_id);
-			memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
+			memset(&ddquot, 0, dqblksz);
 		}
 		else {
 			ret = 0;
 			/* We need to escape back all-zero structure */
-			memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-			empty.dqb_itime = cpu_to_le64(1);
-			if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-				ddquot.dqb_itime = 0;
+			if (!memcmp(&fakedquot[rev], &ddquot, dqblksz)) {
+				if (rev == 0)
+					ddquot.r0.dqb_itime = cpu_to_le64(0);
+				else
+					ddquot.r1.dqb_itime = cpu_to_le64(0);
+			}
 		}
-		disk2memdqb(&dquot->dq_dqb, &ddquot);
+		disk2memdqb(&dquot->dq_dqb, &ddquot, rev);
 		if (!dquot->dq_dqb.dqb_bhardlimit &&
 			!dquot->dq_dqb.dqb_bsoftlimit &&
 			!dquot->dq_dqb.dqb_ihardlimit &&
diff -rNpu linux.2.6.24.3.old/include/linux/dqblk_v2.h linux-2.6.24.3/include/linux/dqblk_v2.h
--- linux.2.6.24.3.old/include/linux/dqblk_v2.h	2008-02-26 03:20:20.000000000 +0300
+++ linux-2.6.24.3/include/linux/dqblk_v2.h	2008-03-13 22:01:48.000000000 +0300
@@ -21,6 +21,7 @@ struct v2_mem_dqinfo {
 	unsigned int dqi_blocks;
 	unsigned int dqi_free_blk;
 	unsigned int dqi_free_entry;
+	unsigned int dqi_revision;
 };
 
 #endif /* _LINUX_DQBLK_V2_H */
diff -rNpu linux.2.6.24.3.old/include/linux/quota.h linux-2.6.24.3/include/linux/quota.h
--- linux.2.6.24.3.old/include/linux/quota.h	2008-03-14 15:34:16.000000000 +0300
+++ linux-2.6.24.3/include/linux/quota.h	2008-03-13 22:01:48.000000000 +0300
@@ -181,12 +181,12 @@ extern spinlock_t dq_data_lock;
  * Data for one user/group kept in memory
  */
 struct mem_dqblk {
-	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
-	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
+	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
 	qsize_t dqb_curspace;	/* current used space */
-	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__u32 dqb_isoftlimit;	/* preferred inode limit */
-	__u32 dqb_curinodes;	/* current # allocated inodes */
+	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	qsize_t dqb_isoftlimit;	/* preferred inode limit */
+	qsize_t dqb_curinodes;	/* current # allocated inodes */
 	time_t dqb_btime;	/* time limit for excessive disk use */
 	time_t dqb_itime;	/* time limit for excessive inode use */
 };
diff -rNpu linux.2.6.24.3.old/include/linux/quotaio_v2.h linux-2.6.24.3/include/linux/quotaio_v2.h
--- linux.2.6.24.3.old/include/linux/quotaio_v2.h	2008-02-26 03:20:20.000000000 +0300
+++ linux-2.6.24.3/include/linux/quotaio_v2.h	2008-03-16 13:46:47.000000000 +0300
@@ -16,28 +16,51 @@
 	0xd9c01927	/* GRPQUOTA */\
 }
 
-#define V2_INITQVERSIONS {\
+#define V2_INITQVERSIONS_R0 {\
 	0,		/* USRQUOTA */\
 	0		/* GRPQUOTA */\
 }
 
+#define V2_INITQVERSIONS_R1 {\
+	1,		/* USRQUOTA */\
+	1		/* GRPQUOTA */\
+}
+
 /*
  * The following structure defines the format of the disk quota file
  * (as it appears on disk) - the file is a radix tree whose leaves point
  * to blocks of these structures.
  */
-struct v2_disk_dqblk {
+struct v2_disk_dqblk_r0 {
 	__le32 dqb_id;		/* id this quota applies to */
 	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
 	__le32 dqb_isoftlimit;	/* preferred inode limit */
 	__le32 dqb_curinodes;	/* current # allocated inodes */
-	__le32 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
-	__le32 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
+	__le32 dqb_bhardlimit;	/* absolute limit on disk space */
+	__le32 dqb_bsoftlimit;	/* preferred limit on disk space */
+	__le64 dqb_curspace;	/* current space occupied (in bytes) */
+	__le64 dqb_btime;	/* time limit for excessive disk use */
+	__le64 dqb_itime;	/* time limit for excessive inode use */
+};
+
+struct v2_disk_dqblk_r1 {
+	__le32 dqb_id;		/* id this quota applies to */
+	__le32 dqb_padding;	/* padding field */
+	__le64 dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	__le64 dqb_isoftlimit;	/* preferred inode limit */
+	__le64 dqb_curinodes;	/* current # allocated inodes */
+	__le64 dqb_bhardlimit;	/* absolute limit on disk space */
+	__le64 dqb_bsoftlimit;	/* preferred limit on disk space */
 	__le64 dqb_curspace;	/* current space occupied (in bytes) */
 	__le64 dqb_btime;	/* time limit for excessive disk use */
 	__le64 dqb_itime;	/* time limit for excessive inode use */
 };
 
+union v2_disk_dqblk {
+	struct v2_disk_dqblk_r0 r0;
+	struct v2_disk_dqblk_r1 r1;
+};
+
 /*
  * Here are header structures as written on disk and their in-memory copies
  */
@@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
 
 /*
  *  Structure of header of block with quota structures. It is padded to 16 bytes so
- *  there will be space for exactly 21 quota-entries in a block
+ *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
  */
 struct v2_disk_dqdbheader {
 	__le32 dqdh_next_free;	/* Number of next block with free entry */
@@ -74,6 +97,5 @@ struct v2_disk_dqdbheader {
 #define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
 #define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
 #define V2_DQTREEDEPTH	4		/* Depth of quota tree */
-#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
 
 #endif /* _LINUX_QUOTAIO_V2_H */
 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs, updated
  2008-03-16 11:21                         ` Andrew Perepechko
@ 2008-03-17 14:35                           ` Jan Kara
  2008-03-20 21:37                             ` Andrew Perepechko
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Kara @ 2008-03-17 14:35 UTC (permalink / raw)
  To: Andrew Perepechko
  Cc: Andreas Dilger, linux-fsdevel, Johann Lombardi,
	Zhiyong Landen tian, Alex Lyashkov

On Sun 16-03-08 14:21:17, Andrew Perepechko wrote:
> Introduce 64-bit quota limits support with QFMT_VFS_V0. 
> 
> This patch is incremental to quota-do-not-allow-setting-of-quota-limits-to-too-high-values.patch included in -mm tree.
> 
> Signed-off-by: Andrew Perepechko <andrew.perepechko@sun.com>
  Nice, I like the patch. Do you have patches for quota tools so that we
can also create files in the format with the new revision? I ask mainly
because I'd like to test it a bit before submitting ;).

								Honza

> 
> ---
> 
>  fs/quota_v2.c              |  263 ++++++++++++++++++++++++++++++++-------------
>  include/linux/dqblk_v2.h   |    1
>  include/linux/quota.h      |   10 -
>  include/linux/quotaio_v2.h |   34 ++++-
>  4 files changed, 222 insertions(+), 86 deletions(-)
> 
> diff -rNpu linux.2.6.24.3.old/fs/quota_v2.c linux-2.6.24.3/fs/quota_v2.c
> --- linux.2.6.24.3.old/fs/quota_v2.c	2008-03-14 15:34:16.000000000 +0300
> +++ linux-2.6.24.3/fs/quota_v2.c	2008-03-16 13:46:01.000000000 +0300
> @@ -23,26 +23,64 @@ MODULE_LICENSE("GPL");
>  typedef char *dqbuf_t;
>  
>  #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
> -#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
> +#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
> +			 sizeof(struct v2_disk_dqdbheader)))
> +#define REV_ASSERT(r) BUG_ON((rev) != 0 && (rev) != 1)
>  
> -/* Check whether given file is really vfsv0 quotafile */
> -static int v2_check_quota_file(struct super_block *sb, int type)
> +static const union v2_disk_dqblk emptydquot;
> +static const union v2_disk_dqblk fakedquot[2] = {
> +	{.r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} },
> +	{.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
> +};
> +
> +static inline uint v2_dqblksz(uint rev)
> +{
> +	uint sz;
> +
> +	REV_ASSERT(rev);
> +
> +	if (rev == 0)
> +		sz = sizeof(struct v2_disk_dqblk_r0);
> +	else
> +		sz = sizeof(struct v2_disk_dqblk_r1);
> +
> +	return sz;
> +}
> +
> +/* Number of quota entries in a block */
> +static inline int v2_dqstrinblk(uint rev)
> +{
> +	return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev);
> +}
> +
> +/* Get revision of a quota file, -1 if it does not look a quota file */
> +static int v2_quota_file_revision(struct super_block *sb, int type)
>  {
>  	struct v2_disk_dqheader dqhead;
>  	ssize_t size;
>  	static const uint quota_magics[] = V2_INITQMAGICS;
> -	static const uint quota_versions[] = V2_INITQVERSIONS;
> +	static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0;
> +	static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1;
>   
>  	size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
>  	if (size != sizeof(struct v2_disk_dqheader)) {
>  		printk("quota_v2: failed read expected=%zd got=%zd\n",
>  			sizeof(struct v2_disk_dqheader), size);
> -		return 0;
> +		return -1;
>  	}
> -	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
> -	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
> -		return 0;
> -	return 1;
> +	if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
> +		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
> +			return 0;
> +		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
> +			return 1;
> +	}
> +	return -1;
> +}
> +
> +/* Check whether given file is really vfsv0 quotafile */
> +static inline int v2_check_quota_file(struct super_block *sb, int type)
> +{
> +	return v2_quota_file_revision(sb, type) != -1;
>  }
>  
>  /* Read information header from quota file */
> @@ -51,6 +89,13 @@ static int v2_read_file_info(struct supe
>  	struct v2_disk_dqinfo dinfo;
>  	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
>  	ssize_t size;
> +	int rev;
> +
> +	rev = v2_quota_file_revision(sb, type);
> +	if (rev < 0) {
> +		printk(KERN_WARNING "Second quota file check failed.\n");
> +		return -1;
> +	}
>  
>  	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
>  	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
> @@ -59,15 +104,22 @@ static int v2_read_file_info(struct supe
>  			sb->s_id);
>  		return -1;
>  	}
> -	/* limits are stored as unsigned 32-bit data */
> -	info->dqi_maxblimit = 0xffffffff;
> -	info->dqi_maxilimit = 0xffffffff;
>  	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
>  	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
>  	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
>  	info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
>  	info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
>  	info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
> +
> +	info->u.v2_i.dqi_revision = rev;
> +	if (rev == 0) {
> +		info->dqi_maxblimit = 0xffffffffULL;
> +		info->dqi_maxilimit = 0xffffffffULL;
> +	} else {
> +		info->dqi_maxblimit = 0xffffffffffffffffULL;
> +		info->dqi_maxilimit = 0xffffffffffffffffULL;
> +	}
> +
>  	return 0;
>  }
>  
> @@ -97,29 +149,61 @@ static int v2_write_file_info(struct sup
>  	return 0;
>  }
>  
> -static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
> +static void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d, uint rev)
>  {
> -	m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
> -	m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
> -	m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
> -	m->dqb_itime = le64_to_cpu(d->dqb_itime);
> -	m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
> -	m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
> -	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
> -	m->dqb_btime = le64_to_cpu(d->dqb_btime);
> -}
> -
> -static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
> -{
> -	d->dqb_ihardlimit = cpu_to_le32((u32)m->dqb_ihardlimit);
> -	d->dqb_isoftlimit = cpu_to_le32((u32)m->dqb_isoftlimit);
> -	d->dqb_curinodes = cpu_to_le32((u32)m->dqb_curinodes);
> -	d->dqb_itime = cpu_to_le64(m->dqb_itime);
> -	d->dqb_bhardlimit = cpu_to_le32((u32)m->dqb_bhardlimit);
> -	d->dqb_bsoftlimit = cpu_to_le32((u32)m->dqb_bsoftlimit);
> -	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
> -	d->dqb_btime = cpu_to_le64(m->dqb_btime);
> -	d->dqb_id = cpu_to_le32(id);
> +	REV_ASSERT(rev);
> +
> +	if (rev == 0) {
> +		struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
> +		m->dqb_ihardlimit = le32_to_cpu(ddqblk->dqb_ihardlimit);
> +		m->dqb_isoftlimit = le32_to_cpu(ddqblk->dqb_isoftlimit);
> +		m->dqb_curinodes = le32_to_cpu(ddqblk->dqb_curinodes);
> +		m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
> +		m->dqb_bhardlimit = le32_to_cpu(ddqblk->dqb_bhardlimit);
> +		m->dqb_bsoftlimit = le32_to_cpu(ddqblk->dqb_bsoftlimit);
> +		m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
> +		m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
> +	} else {
> +		struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
> +		m->dqb_ihardlimit = le64_to_cpu(ddqblk->dqb_ihardlimit);
> +		m->dqb_isoftlimit = le64_to_cpu(ddqblk->dqb_isoftlimit);
> +		m->dqb_curinodes = le64_to_cpu(ddqblk->dqb_curinodes);
> +		m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
> +		m->dqb_bhardlimit = le64_to_cpu(ddqblk->dqb_bhardlimit);
> +		m->dqb_bsoftlimit = le64_to_cpu(ddqblk->dqb_bsoftlimit);
> +		m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
> +		m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
> +	}
> +}
> +
> +static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m,
> +			qid_t id, uint rev)
> +{
> +	REV_ASSERT(rev);
> +
> +	if (rev == 0) {
> +		struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
> +		ddqblk->dqb_id = cpu_to_le32(id);
> +		ddqblk->dqb_ihardlimit = cpu_to_le32((__u32)m->dqb_ihardlimit);
> +		ddqblk->dqb_isoftlimit = cpu_to_le32((__u32)m->dqb_isoftlimit);
> +		ddqblk->dqb_curinodes = cpu_to_le32((__u32)m->dqb_curinodes);
> +		ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
> +		ddqblk->dqb_bhardlimit = cpu_to_le32((__u32)m->dqb_bhardlimit);
> +		ddqblk->dqb_bsoftlimit = cpu_to_le32((__u32)m->dqb_bsoftlimit);
> +		ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
> +		ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
> +	} else {
> +		struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
> +		ddqblk->dqb_id = cpu_to_le32(id);
> +		ddqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
> +		ddqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
> +		ddqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
> +		ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
> +		ddqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
> +		ddqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
> +		ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
> +		ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
> +	}
>  }
>  
>  static dqbuf_t getdqbuf(void)
> @@ -271,10 +355,10 @@ static uint find_free_dqentry(struct dqu
>  {
>  	struct super_block *sb = dquot->dq_sb;
>  	struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
> -	uint blk, i;
> +	uint blk, i, rev = info->u.v2_i.dqi_revision;
> +	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
>  	struct v2_disk_dqdbheader *dh;
> -	struct v2_disk_dqblk *ddquot;
> -	struct v2_disk_dqblk fakedquot;
> +	union v2_disk_dqblk *ddquot;
>  	dqbuf_t buf;
>  
>  	*err = 0;
> @@ -301,17 +385,18 @@ static uint find_free_dqentry(struct dqu
>  		info->u.v2_i.dqi_free_entry = blk;
>  		mark_info_dirty(sb, dquot->dq_type);
>  	}
> -	if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)	/* Block will be full? */
> +	/* Block will be full? */
> +	if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
>  		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
>  			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
>  			goto out_buf;
>  		}
>  	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
> -	memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
>  	/* Find free structure in block */
> -	for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
> +	for (i = 0; i < dqstrinblk && memcmp(&emptydquot, ddquot, dqblksz);
> +	     i++, ddquot++);
>  #ifdef __QUOTA_V2_PARANOIA
> -	if (i == V2_DQSTRINBLK) {
> +	if (i == dqstrinblk) {
>  		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
>  		*err = -EIO;
>  		goto out_buf;
> @@ -321,7 +406,8 @@ static uint find_free_dqentry(struct dqu
>  		printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
>  		goto out_buf;
>  	}
> -	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
> +	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
> +			((char *)ddquot - (char *)buf);
>  	freedqbuf(buf);
>  	return blk;
>  out_buf:
> @@ -395,7 +481,9 @@ static int v2_write_dquot(struct dquot *
>  {
>  	int type = dquot->dq_type;
>  	ssize_t ret;
> -	struct v2_disk_dqblk ddquot, empty;
> +	union v2_disk_dqblk ddquot;
> +	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
> +	uint dqblksz = v2_dqblksz(rev);
>  
>  	/* dq_off is guarded by dqio_mutex */
>  	if (!dquot->dq_off)
> @@ -404,18 +492,22 @@ static int v2_write_dquot(struct dquot *
>  			return ret;
>  		}
>  	spin_lock(&dq_data_lock);
> -	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
> +	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev);
>  	/* Argh... We may need to write structure full of zeroes but that would be
>  	 * treated as an empty place by the rest of the code. Format change would
>  	 * be definitely cleaner but the problems probably are not worth it */
> -	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
> -	if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
> -		ddquot.dqb_itime = cpu_to_le64(1);
> +	if (!memcmp(&emptydquot, &ddquot, dqblksz)) {
> +		if (rev == 0)
> +			ddquot.r0.dqb_itime = cpu_to_le64(1);
> +		else
> +			ddquot.r1.dqb_itime = cpu_to_le64(1);
> +	}
>  	spin_unlock(&dq_data_lock);
>  	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
> -	      (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
> -	if (ret != sizeof(struct v2_disk_dqblk)) {
> -		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
> +	      (char *)&ddquot, dqblksz, dquot->dq_off);
> +	if (ret != dqblksz) {
> +		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
> +			dquot->dq_sb->s_id);
>  		if (ret >= 0)
>  			ret = -ENOSPC;
>  	}
> @@ -434,6 +526,7 @@ static int free_dqentry(struct dquot *dq
>  	struct v2_disk_dqdbheader *dh;
>  	dqbuf_t buf = getdqbuf();
>  	int ret = 0;
> +	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
>  
>  	if (!buf)
>  		return -ENOMEM;
> @@ -459,8 +552,8 @@ static int free_dqentry(struct dquot *dq
>  	}
>  	else {
>  		memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
> -		  sizeof(struct v2_disk_dqblk));
> -		if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
> +		  v2_dqblksz(rev));
> +		if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) {
>  			/* Insert will write block itself */
>  			if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
>  				printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
> @@ -532,41 +625,56 @@ static int v2_delete_dquot(struct dquot
>  	return remove_tree(dquot, &tmp, 0);
>  }
>  
> +static inline __u32 dqid(union v2_disk_dqblk *ddquot, uint rev)
> +{
> +	__u32 dq_id;
> +
> +	REV_ASSERT(rev);
> +
> +	if (rev == 0)
> +		dq_id = le32_to_cpu(ddquot->r0.dqb_id);
> +	else
> +		dq_id = le32_to_cpu(ddquot->r1.dqb_id);
> +
> +	return dq_id;
> +}
> +
>  /* Find entry in block */
>  static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
>  {
>  	dqbuf_t buf = getdqbuf();
>  	loff_t ret = 0;
>  	int i;
> -	struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
> +	union v2_disk_dqblk *ddquot = GETENTRIES(buf);
> +	int type = dquot->dq_type;
> +	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
> +	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
>  
>  	if (!buf)
>  		return -ENOMEM;
> -	if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
> +
> +	ret = read_blk(dquot->dq_sb, type, blk, buf);
> +	if (ret < 0) {
>  		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
>  		goto out_buf;
>  	}
>  	if (dquot->dq_id)
> -		for (i = 0; i < V2_DQSTRINBLK &&
> -		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
> +		for (i = 0; i < dqstrinblk && dqid(ddquot, rev) != dquot->dq_id;
> +		     i++, ddquot++);
>  	else {	/* ID 0 as a bit more complicated searching... */
> -		struct v2_disk_dqblk fakedquot;
> -
> -		memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
> -		for (i = 0; i < V2_DQSTRINBLK; i++)
> -			if (!le32_to_cpu(ddquot[i].dqb_id) &&
> -			    memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
> +		for (i = 0; i < dqstrinblk; i++, ddquot++)
> +			if (!dqid(ddquot, rev) &&
> +			    memcmp(&emptydquot, ddquot, dqblksz))
>  				break;
>  	}
> -	if (i == V2_DQSTRINBLK) {
> +	if (i == dqstrinblk) {
>  		printk(KERN_ERR "VFS: Quota for id %u referenced "
>  		  "but not present.\n", dquot->dq_id);
>  		ret = -EIO;
>  		goto out_buf;
>  	}
>  	else
> -		ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
> -		  v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
> +		ret = (blk << V2_DQBLKSIZE_BITS)+((char *)ddquot-(char *)buf);
>  out_buf:
>  	freedqbuf(buf);
>  	return ret;
> @@ -608,7 +716,7 @@ static int v2_read_dquot(struct dquot *d
>  {
>  	int type = dquot->dq_type;
>  	loff_t offset;
> -	struct v2_disk_dqblk ddquot, empty;
> +	union v2_disk_dqblk ddquot;
>  	int ret = 0;
>  
>  #ifdef __QUOTA_V2_PARANOIA
> @@ -629,25 +737,30 @@ static int v2_read_dquot(struct dquot *d
>  		ret = offset;
>  	}
>  	else {
> +		uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.
> +			   dqi_revision;
> +		uint  dqblksz = v2_dqblksz(rev);
>  		dquot->dq_off = offset;
> -		if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
> -		    (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
> -		    != sizeof(struct v2_disk_dqblk)) {
> +		ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
> +					   (char *)&ddquot, dqblksz, offset);
> +		if (ret != dqblksz) {
>  			if (ret >= 0)
>  				ret = -EIO;
>  			printk(KERN_ERR "VFS: Error while reading quota "
>  			  "structure for id %u.\n", dquot->dq_id);
> -			memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
> +			memset(&ddquot, 0, dqblksz);
>  		}
>  		else {
>  			ret = 0;
>  			/* We need to escape back all-zero structure */
> -			memset(&empty, 0, sizeof(struct v2_disk_dqblk));
> -			empty.dqb_itime = cpu_to_le64(1);
> -			if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
> -				ddquot.dqb_itime = 0;
> +			if (!memcmp(&fakedquot[rev], &ddquot, dqblksz)) {
> +				if (rev == 0)
> +					ddquot.r0.dqb_itime = cpu_to_le64(0);
> +				else
> +					ddquot.r1.dqb_itime = cpu_to_le64(0);
> +			}
>  		}
> -		disk2memdqb(&dquot->dq_dqb, &ddquot);
> +		disk2memdqb(&dquot->dq_dqb, &ddquot, rev);
>  		if (!dquot->dq_dqb.dqb_bhardlimit &&
>  			!dquot->dq_dqb.dqb_bsoftlimit &&
>  			!dquot->dq_dqb.dqb_ihardlimit &&
> diff -rNpu linux.2.6.24.3.old/include/linux/dqblk_v2.h linux-2.6.24.3/include/linux/dqblk_v2.h
> --- linux.2.6.24.3.old/include/linux/dqblk_v2.h	2008-02-26 03:20:20.000000000 +0300
> +++ linux-2.6.24.3/include/linux/dqblk_v2.h	2008-03-13 22:01:48.000000000 +0300
> @@ -21,6 +21,7 @@ struct v2_mem_dqinfo {
>  	unsigned int dqi_blocks;
>  	unsigned int dqi_free_blk;
>  	unsigned int dqi_free_entry;
> +	unsigned int dqi_revision;
>  };
>  
>  #endif /* _LINUX_DQBLK_V2_H */
> diff -rNpu linux.2.6.24.3.old/include/linux/quota.h linux-2.6.24.3/include/linux/quota.h
> --- linux.2.6.24.3.old/include/linux/quota.h	2008-03-14 15:34:16.000000000 +0300
> +++ linux-2.6.24.3/include/linux/quota.h	2008-03-13 22:01:48.000000000 +0300
> @@ -181,12 +181,12 @@ extern spinlock_t dq_data_lock;
>   * Data for one user/group kept in memory
>   */
>  struct mem_dqblk {
> -	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> -	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
> +	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> +	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
>  	qsize_t dqb_curspace;	/* current used space */
> -	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
> -	__u32 dqb_isoftlimit;	/* preferred inode limit */
> -	__u32 dqb_curinodes;	/* current # allocated inodes */
> +	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
> +	qsize_t dqb_isoftlimit;	/* preferred inode limit */
> +	qsize_t dqb_curinodes;	/* current # allocated inodes */
>  	time_t dqb_btime;	/* time limit for excessive disk use */
>  	time_t dqb_itime;	/* time limit for excessive inode use */
>  };
> diff -rNpu linux.2.6.24.3.old/include/linux/quotaio_v2.h linux-2.6.24.3/include/linux/quotaio_v2.h
> --- linux.2.6.24.3.old/include/linux/quotaio_v2.h	2008-02-26 03:20:20.000000000 +0300
> +++ linux-2.6.24.3/include/linux/quotaio_v2.h	2008-03-16 13:46:47.000000000 +0300
> @@ -16,28 +16,51 @@
>  	0xd9c01927	/* GRPQUOTA */\
>  }
>  
> -#define V2_INITQVERSIONS {\
> +#define V2_INITQVERSIONS_R0 {\
>  	0,		/* USRQUOTA */\
>  	0		/* GRPQUOTA */\
>  }
>  
> +#define V2_INITQVERSIONS_R1 {\
> +	1,		/* USRQUOTA */\
> +	1		/* GRPQUOTA */\
> +}
> +
>  /*
>   * The following structure defines the format of the disk quota file
>   * (as it appears on disk) - the file is a radix tree whose leaves point
>   * to blocks of these structures.
>   */
> -struct v2_disk_dqblk {
> +struct v2_disk_dqblk_r0 {
>  	__le32 dqb_id;		/* id this quota applies to */
>  	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
>  	__le32 dqb_isoftlimit;	/* preferred inode limit */
>  	__le32 dqb_curinodes;	/* current # allocated inodes */
> -	__le32 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
> -	__le32 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
> +	__le32 dqb_bhardlimit;	/* absolute limit on disk space */
> +	__le32 dqb_bsoftlimit;	/* preferred limit on disk space */
> +	__le64 dqb_curspace;	/* current space occupied (in bytes) */
> +	__le64 dqb_btime;	/* time limit for excessive disk use */
> +	__le64 dqb_itime;	/* time limit for excessive inode use */
> +};
> +
> +struct v2_disk_dqblk_r1 {
> +	__le32 dqb_id;		/* id this quota applies to */
> +	__le32 dqb_padding;	/* padding field */
> +	__le64 dqb_ihardlimit;	/* absolute limit on allocated inodes */
> +	__le64 dqb_isoftlimit;	/* preferred inode limit */
> +	__le64 dqb_curinodes;	/* current # allocated inodes */
> +	__le64 dqb_bhardlimit;	/* absolute limit on disk space */
> +	__le64 dqb_bsoftlimit;	/* preferred limit on disk space */
>  	__le64 dqb_curspace;	/* current space occupied (in bytes) */
>  	__le64 dqb_btime;	/* time limit for excessive disk use */
>  	__le64 dqb_itime;	/* time limit for excessive inode use */
>  };
>  
> +union v2_disk_dqblk {
> +	struct v2_disk_dqblk_r0 r0;
> +	struct v2_disk_dqblk_r1 r1;
> +};
> +
>  /*
>   * Here are header structures as written on disk and their in-memory copies
>   */
> @@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
>  
>  /*
>   *  Structure of header of block with quota structures. It is padded to 16 bytes so
> - *  there will be space for exactly 21 quota-entries in a block
> + *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
>   */
>  struct v2_disk_dqdbheader {
>  	__le32 dqdh_next_free;	/* Number of next block with free entry */
> @@ -74,6 +97,5 @@ struct v2_disk_dqdbheader {
>  #define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
>  #define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
>  #define V2_DQTREEDEPTH	4		/* Depth of quota tree */
> -#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
>  
>  #endif /* _LINUX_QUOTAIO_V2_H */
>  
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs, updated
  2008-03-16  1:14                       ` Andrew Perepechko
  2008-03-16 11:21                         ` Andrew Perepechko
@ 2008-03-17 14:51                         ` Jan Kara
  1 sibling, 0 replies; 21+ messages in thread
From: Jan Kara @ 2008-03-17 14:51 UTC (permalink / raw)
  To: Andrew Perepechko
  Cc: Andreas Dilger, linux-fsdevel, Johann Lombardi,
	Zhiyong Landen tian, Alex Lyashkov

  Hi,

On Sun 16-03-08 04:14:58, Andrew Perepechko wrote:
> > >  struct mem_dqblk {
> > > -	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> > > -	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
> > > +	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
> > > +	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
> > >  	qsize_t dqb_curspace;	/* current used space */
> > > -	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
> > > -	__u32 dqb_isoftlimit;	/* preferred inode limit */
> > > -	__u32 dqb_curinodes;	/* current # allocated inodes */
> > > +	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
> > > +	qsize_t dqb_isoftlimit;	/* preferred inode limit */
> > > +	qsize_t dqb_curinodes;	/* current # allocated inodes */
> > >  	time_t dqb_btime;	/* time limit for excessive disk use */
> > >  	time_t dqb_itime;	/* time limit for excessive inode use */
> > >  };
> > 
> > Is there a reason to use "qsize_t" instead of just using __u64 directly?
> > 
> 
> Well, I'm not sure if there's a special meaning for qsize_t. I have only followed
> the dqb_curspace convention, which is of qsize_t type. While I see no
> special usage for qsize_t (well, it looks more nice than __u64 :), I at the same time 
> see no problems using qsize_t. Maybe Jan will tell us more about qsize_t.
  Well, when designing these structures originally, I wanted to have one
clear type for holding quota space information and so I introduced qsize_t.
Speaking about it, logically we should have a similar type for "number of
inodes" (something like qcount_t) because there is no logical connection between
number of inodes and space... But I don't really care much.

									Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs, updated
  2008-03-17 14:35                           ` Jan Kara
@ 2008-03-20 21:37                             ` Andrew Perepechko
  2008-03-21  1:04                               ` Andreas Dilger
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Perepechko @ 2008-03-20 21:37 UTC (permalink / raw)
  To: Jan Kara; +Cc: linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

[-- Attachment #1: Type: text/plain, Size: 21074 bytes --]

On Monday 17 March 2008 17:35:24 Jan Kara wrote:
>   Nice, I like the patch. Do you have patches for quota tools so that we
> can also create files in the format with the new revision? I ask mainly
> because I'd like to test it a bit before submitting ;).
> 
> 								Honza

Hello, Jan.

I made some updates to the patch (see below) as well as some checks
(see attachment).

I'm working on patches for quota tools but I suppose my checks
prove that the thing works in general, so it can be pushed ahead. ;)

Signed-off-by: Andrew Perepechko <andrew.perepechko@sun.com>

---

 fs/quota_v2.c              |  264 ++++++++++++++++++++++++++++++++-------------
 include/linux/dqblk_v2.h   |    1
 include/linux/quota.h      |   10 -
 include/linux/quotaio_v2.h |   34 ++++-
 4 files changed, 223 insertions(+), 86 deletions(-)

diff -rNpu linux-2.6.24.3.quota64/fs/quota_v2.c linux-2.6.24.3/fs/quota_v2.c
--- linux-2.6.24.3.quota64/fs/quota_v2.c	2008-03-21 00:09:33.000000000 +0300
+++ linux-2.6.24.3/fs/quota_v2.c	2008-03-21 00:13:34.000000000 +0300
@@ -23,26 +23,64 @@ MODULE_LICENSE("GPL");
 typedef char *dqbuf_t;
 
 #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
+#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
+			 sizeof(struct v2_disk_dqdbheader)))
+#define REV_ASSERT(r) BUG_ON((rev) != 0 && (rev) != 1)
 
-/* Check whether given file is really vfsv0 quotafile */
-static int v2_check_quota_file(struct super_block *sb, int type)
+static const union v2_disk_dqblk emptydquot;
+static const union v2_disk_dqblk fakedquot[2] = {
+	{.r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} },
+	{.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
+};
+
+static inline uint v2_dqblksz(uint rev)
+{
+	uint sz;
+
+	REV_ASSERT(rev);
+
+	if (rev == 0)
+		sz = sizeof(struct v2_disk_dqblk_r0);
+	else
+		sz = sizeof(struct v2_disk_dqblk_r1);
+
+	return sz;
+}
+
+/* Number of quota entries in a block */
+static inline int v2_dqstrinblk(uint rev)
+{
+	return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev);
+}
+
+/* Get revision of a quota file, -1 if it does not look a quota file */
+static int v2_quota_file_revision(struct super_block *sb, int type)
 {
 	struct v2_disk_dqheader dqhead;
 	ssize_t size;
 	static const uint quota_magics[] = V2_INITQMAGICS;
-	static const uint quota_versions[] = V2_INITQVERSIONS;
+	static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0;
+	static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1;
  
 	size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
 	if (size != sizeof(struct v2_disk_dqheader)) {
 		printk("quota_v2: failed read expected=%zd got=%zd\n",
 			sizeof(struct v2_disk_dqheader), size);
-		return 0;
+		return -1;
 	}
-	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
-	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
-		return 0;
-	return 1;
+	if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
+		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
+			return 0;
+		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
+			return 1;
+	}
+	return -1;
+}
+
+/* Check whether given file is really vfsv0 quotafile */
+static inline int v2_check_quota_file(struct super_block *sb, int type)
+{
+	return v2_quota_file_revision(sb, type) != -1;
 }
 
 /* Read information header from quota file */
@@ -51,6 +89,13 @@ static int v2_read_file_info(struct supe
 	struct v2_disk_dqinfo dinfo;
 	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
 	ssize_t size;
+	int rev;
+
+	rev = v2_quota_file_revision(sb, type);
+	if (rev < 0) {
+		printk(KERN_WARNING "Second quota file check failed.\n");
+		return -1;
+	}
 
 	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
@@ -59,15 +104,22 @@ static int v2_read_file_info(struct supe
 			sb->s_id);
 		return -1;
 	}
-	/* limits are stored as unsigned 32-bit data */
-	info->dqi_maxblimit = 0xffffffff;
-	info->dqi_maxilimit = 0xffffffff;
 	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
 	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
 	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
 	info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
 	info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
 	info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+
+	info->u.v2_i.dqi_revision = rev;
+	if (rev == 0) {
+		info->dqi_maxblimit = 0xffffffffULL;
+		info->dqi_maxilimit = 0xffffffffULL;
+	} else {
+		info->dqi_maxblimit = 0xffffffffffffffffULL;
+		info->dqi_maxilimit = 0xffffffffffffffffULL;
+	}
+
 	return 0;
 }
 
@@ -97,29 +149,61 @@ static int v2_write_file_info(struct sup
 	return 0;
 }
 
-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
+static void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d, uint rev)
 {
-	m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
-	m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
-	m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
-	m->dqb_itime = le64_to_cpu(d->dqb_itime);
-	m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
-	m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
-	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
-	m->dqb_btime = le64_to_cpu(d->dqb_btime);
-}
-
-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
-{
-	d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
-	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
-	d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
-	d->dqb_itime = cpu_to_le64(m->dqb_itime);
-	d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
-	d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
-	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
-	d->dqb_btime = cpu_to_le64(m->dqb_btime);
-	d->dqb_id = cpu_to_le32(id);
+	REV_ASSERT(rev);
+
+	if (rev == 0) {
+		struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
+		m->dqb_ihardlimit = le32_to_cpu(ddqblk->dqb_ihardlimit);
+		m->dqb_isoftlimit = le32_to_cpu(ddqblk->dqb_isoftlimit);
+		m->dqb_curinodes = le32_to_cpu(ddqblk->dqb_curinodes);
+		m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
+		m->dqb_bhardlimit = le32_to_cpu(ddqblk->dqb_bhardlimit);
+		m->dqb_bsoftlimit = le32_to_cpu(ddqblk->dqb_bsoftlimit);
+		m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
+		m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
+	} else {
+		struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
+		m->dqb_ihardlimit = le64_to_cpu(ddqblk->dqb_ihardlimit);
+		m->dqb_isoftlimit = le64_to_cpu(ddqblk->dqb_isoftlimit);
+		m->dqb_curinodes = le64_to_cpu(ddqblk->dqb_curinodes);
+		m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
+		m->dqb_bhardlimit = le64_to_cpu(ddqblk->dqb_bhardlimit);
+		m->dqb_bsoftlimit = le64_to_cpu(ddqblk->dqb_bsoftlimit);
+		m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
+		m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
+	}
+}
+
+static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m,
+			qid_t id, uint rev)
+{
+	REV_ASSERT(rev);
+
+	if (rev == 0) {
+		struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
+		ddqblk->dqb_id = cpu_to_le32(id);
+		ddqblk->dqb_ihardlimit = cpu_to_le32((__u32)m->dqb_ihardlimit);
+		ddqblk->dqb_isoftlimit = cpu_to_le32((__u32)m->dqb_isoftlimit);
+		ddqblk->dqb_curinodes = cpu_to_le32((__u32)m->dqb_curinodes);
+		ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
+		ddqblk->dqb_bhardlimit = cpu_to_le32((__u32)m->dqb_bhardlimit);
+		ddqblk->dqb_bsoftlimit = cpu_to_le32((__u32)m->dqb_bsoftlimit);
+		ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+		ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
+	} else {
+		struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
+		ddqblk->dqb_id = cpu_to_le32(id);
+		ddqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
+		ddqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
+		ddqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
+		ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
+		ddqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
+		ddqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
+		ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+		ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
+	}
 }
 
 static dqbuf_t getdqbuf(void)
@@ -271,10 +355,10 @@ static uint find_free_dqentry(struct dqu
 {
 	struct super_block *sb = dquot->dq_sb;
 	struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
-	uint blk, i;
+	uint blk, i, rev = info->u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 	struct v2_disk_dqdbheader *dh;
-	struct v2_disk_dqblk *ddquot;
-	struct v2_disk_dqblk fakedquot;
+	union v2_disk_dqblk *ddquot;
 	dqbuf_t buf;
 
 	*err = 0;
@@ -301,17 +385,18 @@ static uint find_free_dqentry(struct dqu
 		info->u.v2_i.dqi_free_entry = blk;
 		mark_info_dirty(sb, dquot->dq_type);
 	}
-	if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)	/* Block will be full? */
+	/* Block will be full? */
+	if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
 		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
 			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
 			goto out_buf;
 		}
 	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
-	memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
 	/* Find free structure in block */
-	for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
+	for (i = 0; i < dqstrinblk && memcmp(&emptydquot, ddquot, dqblksz);
+	     i++, ddquot = (union v2_disk_dqblk *)((char *)ddquot + dqblksz));
 #ifdef __QUOTA_V2_PARANOIA
-	if (i == V2_DQSTRINBLK) {
+	if (i == dqstrinblk) {
 		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
 		*err = -EIO;
 		goto out_buf;
@@ -321,7 +406,8 @@ static uint find_free_dqentry(struct dqu
 		printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
 		goto out_buf;
 	}
-	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
+	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
+			((char *)ddquot - (char *)buf);
 	freedqbuf(buf);
 	return blk;
 out_buf:
@@ -395,7 +481,9 @@ static int v2_write_dquot(struct dquot *
 {
 	int type = dquot->dq_type;
 	ssize_t ret;
-	struct v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot;
+	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev);
 
 	/* dq_off is guarded by dqio_mutex */
 	if (!dquot->dq_off)
@@ -404,18 +492,22 @@ static int v2_write_dquot(struct dquot *
 			return ret;
 		}
 	spin_lock(&dq_data_lock);
-	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
+	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev);
 	/* Argh... We may need to write structure full of zeroes but that would be
 	 * treated as an empty place by the rest of the code. Format change would
 	 * be definitely cleaner but the problems probably are not worth it */
-	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-	if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-		ddquot.dqb_itime = cpu_to_le64(1);
+	if (!memcmp(&emptydquot, &ddquot, dqblksz)) {
+		if (rev == 0)
+			ddquot.r0.dqb_itime = cpu_to_le64(1);
+		else
+			ddquot.r1.dqb_itime = cpu_to_le64(1);
+	}
 	spin_unlock(&dq_data_lock);
 	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
-	      (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
-	if (ret != sizeof(struct v2_disk_dqblk)) {
-		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
+	      (char *)&ddquot, dqblksz, dquot->dq_off);
+	if (ret != dqblksz) {
+		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
+			dquot->dq_sb->s_id);
 		if (ret >= 0)
 			ret = -ENOSPC;
 	}
@@ -434,6 +526,7 @@ static int free_dqentry(struct dquot *dq
 	struct v2_disk_dqdbheader *dh;
 	dqbuf_t buf = getdqbuf();
 	int ret = 0;
+	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
 
 	if (!buf)
 		return -ENOMEM;
@@ -459,8 +552,8 @@ static int free_dqentry(struct dquot *dq
 	}
 	else {
 		memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
-		  sizeof(struct v2_disk_dqblk));
-		if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
+		  v2_dqblksz(rev));
+		if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) {
 			/* Insert will write block itself */
 			if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
 				printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
@@ -532,41 +625,57 @@ static int v2_delete_dquot(struct dquot
 	return remove_tree(dquot, &tmp, 0);
 }
 
+static inline __u32 dqid(union v2_disk_dqblk *ddquot, uint rev)
+{
+	__u32 dq_id;
+
+	REV_ASSERT(rev);
+
+	if (rev == 0)
+		dq_id = le32_to_cpu(ddquot->r0.dqb_id);
+	else
+		dq_id = le32_to_cpu(ddquot->r1.dqb_id);
+
+	return dq_id;
+}
+
 /* Find entry in block */
 static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
 {
 	dqbuf_t buf = getdqbuf();
 	loff_t ret = 0;
 	int i;
-	struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
+	union v2_disk_dqblk *ddquot = GETENTRIES(buf);
+	int type = dquot->dq_type;
+	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 
 	if (!buf)
 		return -ENOMEM;
-	if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
+
+	ret = read_blk(dquot->dq_sb, type, blk, buf);
+	if (ret < 0) {
 		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
 		goto out_buf;
 	}
 	if (dquot->dq_id)
-		for (i = 0; i < V2_DQSTRINBLK &&
-		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
+		for (i = 0; i < dqstrinblk && dqid(ddquot, rev) != dquot->dq_id;
+		     i++, ddquot = (union v2_disk_dqblk *)((char *)ddquot + dqblksz));
 	else {	/* ID 0 as a bit more complicated searching... */
-		struct v2_disk_dqblk fakedquot;
-
-		memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
-		for (i = 0; i < V2_DQSTRINBLK; i++)
-			if (!le32_to_cpu(ddquot[i].dqb_id) &&
-			    memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
+		for (i = 0; i < dqstrinblk; i++, 
+		     ddquot = (union v2_disk_dqblk *)((char *)ddquot+dqblksz))
+			if (!dqid(ddquot, rev) &&
+			    memcmp(&emptydquot, ddquot, dqblksz))
 				break;
 	}
-	if (i == V2_DQSTRINBLK) {
+	if (i == dqstrinblk) {
 		printk(KERN_ERR "VFS: Quota for id %u referenced "
 		  "but not present.\n", dquot->dq_id);
 		ret = -EIO;
 		goto out_buf;
 	}
 	else
-		ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
-		  v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
+		ret = (blk << V2_DQBLKSIZE_BITS)+((char *)ddquot-(char *)buf);
 out_buf:
 	freedqbuf(buf);
 	return ret;
@@ -608,7 +717,7 @@ static int v2_read_dquot(struct dquot *d
 {
 	int type = dquot->dq_type;
 	loff_t offset;
-	struct v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot;
 	int ret = 0;
 
 #ifdef __QUOTA_V2_PARANOIA
@@ -629,25 +738,30 @@ static int v2_read_dquot(struct dquot *d
 		ret = offset;
 	}
 	else {
+		uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.
+			   dqi_revision;
+		uint  dqblksz = v2_dqblksz(rev);
 		dquot->dq_off = offset;
-		if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
-		    (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
-		    != sizeof(struct v2_disk_dqblk)) {
+		ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
+					   (char *)&ddquot, dqblksz, offset);
+		if (ret != dqblksz) {
 			if (ret >= 0)
 				ret = -EIO;
 			printk(KERN_ERR "VFS: Error while reading quota "
 			  "structure for id %u.\n", dquot->dq_id);
-			memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
+			memset(&ddquot, 0, dqblksz);
 		}
 		else {
 			ret = 0;
 			/* We need to escape back all-zero structure */
-			memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-			empty.dqb_itime = cpu_to_le64(1);
-			if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-				ddquot.dqb_itime = 0;
+			if (!memcmp(&fakedquot[rev], &ddquot, dqblksz)) {
+				if (rev == 0)
+					ddquot.r0.dqb_itime = cpu_to_le64(0);
+				else
+					ddquot.r1.dqb_itime = cpu_to_le64(0);
+			}
 		}
-		disk2memdqb(&dquot->dq_dqb, &ddquot);
+		disk2memdqb(&dquot->dq_dqb, &ddquot, rev);
 		if (!dquot->dq_dqb.dqb_bhardlimit &&
 			!dquot->dq_dqb.dqb_bsoftlimit &&
 			!dquot->dq_dqb.dqb_ihardlimit &&
diff -rNpu linux-2.6.24.3.quota64/include/linux/dqblk_v2.h linux-2.6.24.3/include/linux/dqblk_v2.h
--- linux-2.6.24.3.quota64/include/linux/dqblk_v2.h	2008-03-19 17:29:28.000000000 +0300
+++ linux-2.6.24.3/include/linux/dqblk_v2.h	2008-03-21 00:11:32.000000000 +0300
@@ -21,6 +21,7 @@ struct v2_mem_dqinfo {
 	unsigned int dqi_blocks;
 	unsigned int dqi_free_blk;
 	unsigned int dqi_free_entry;
+	unsigned int dqi_revision;
 };
 
 #endif /* _LINUX_DQBLK_V2_H */
diff -rNpu linux-2.6.24.3.quota64/include/linux/quota.h linux-2.6.24.3/include/linux/quota.h
--- linux-2.6.24.3.quota64/include/linux/quota.h	2008-03-21 00:09:33.000000000 +0300
+++ linux-2.6.24.3/include/linux/quota.h	2008-03-21 00:11:32.000000000 +0300
@@ -181,12 +181,12 @@ extern spinlock_t dq_data_lock;
  * Data for one user/group kept in memory
  */
 struct mem_dqblk {
-	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
-	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
+	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
 	qsize_t dqb_curspace;	/* current used space */
-	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__u32 dqb_isoftlimit;	/* preferred inode limit */
-	__u32 dqb_curinodes;	/* current # allocated inodes */
+	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	qsize_t dqb_isoftlimit;	/* preferred inode limit */
+	qsize_t dqb_curinodes;	/* current # allocated inodes */
 	time_t dqb_btime;	/* time limit for excessive disk use */
 	time_t dqb_itime;	/* time limit for excessive inode use */
 };
diff -rNpu linux-2.6.24.3.quota64/include/linux/quotaio_v2.h linux-2.6.24.3/include/linux/quotaio_v2.h
--- linux-2.6.24.3.quota64/include/linux/quotaio_v2.h	2008-03-19 17:29:28.000000000 +0300
+++ linux-2.6.24.3/include/linux/quotaio_v2.h	2008-03-21 00:11:32.000000000 +0300
@@ -16,28 +16,51 @@
 	0xd9c01927	/* GRPQUOTA */\
 }
 
-#define V2_INITQVERSIONS {\
+#define V2_INITQVERSIONS_R0 {\
 	0,		/* USRQUOTA */\
 	0		/* GRPQUOTA */\
 }
 
+#define V2_INITQVERSIONS_R1 {\
+	1,		/* USRQUOTA */\
+	1		/* GRPQUOTA */\
+}
+
 /*
  * The following structure defines the format of the disk quota file
  * (as it appears on disk) - the file is a radix tree whose leaves point
  * to blocks of these structures.
  */
-struct v2_disk_dqblk {
+struct v2_disk_dqblk_r0 {
 	__le32 dqb_id;		/* id this quota applies to */
 	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
 	__le32 dqb_isoftlimit;	/* preferred inode limit */
 	__le32 dqb_curinodes;	/* current # allocated inodes */
-	__le32 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
-	__le32 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
+	__le32 dqb_bhardlimit;	/* absolute limit on disk space */
+	__le32 dqb_bsoftlimit;	/* preferred limit on disk space */
+	__le64 dqb_curspace;	/* current space occupied (in bytes) */
+	__le64 dqb_btime;	/* time limit for excessive disk use */
+	__le64 dqb_itime;	/* time limit for excessive inode use */
+};
+
+struct v2_disk_dqblk_r1 {
+	__le32 dqb_id;		/* id this quota applies to */
+	__le32 dqb_padding;	/* padding field */
+	__le64 dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	__le64 dqb_isoftlimit;	/* preferred inode limit */
+	__le64 dqb_curinodes;	/* current # allocated inodes */
+	__le64 dqb_bhardlimit;	/* absolute limit on disk space */
+	__le64 dqb_bsoftlimit;	/* preferred limit on disk space */
 	__le64 dqb_curspace;	/* current space occupied (in bytes) */
 	__le64 dqb_btime;	/* time limit for excessive disk use */
 	__le64 dqb_itime;	/* time limit for excessive inode use */
 };
 
+union v2_disk_dqblk {
+	struct v2_disk_dqblk_r0 r0;
+	struct v2_disk_dqblk_r1 r1;
+};
+
 /*
  * Here are header structures as written on disk and their in-memory copies
  */
@@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
 
 /*
  *  Structure of header of block with quota structures. It is padded to 16 bytes so
- *  there will be space for exactly 21 quota-entries in a block
+ *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
  */
 struct v2_disk_dqdbheader {
 	__le32 dqdh_next_free;	/* Number of next block with free entry */
@@ -74,6 +97,5 @@ struct v2_disk_dqdbheader {
 #define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
 #define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
 #define V2_DQTREEDEPTH	4		/* Depth of quota tree */
-#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
 
 #endif /* _LINUX_QUOTAIO_V2_H */

[-- Attachment #2: tests.tar.bz2 --]
[-- Type: application/x-tbz, Size: 14718 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs, updated
  2008-03-20 21:37                             ` Andrew Perepechko
@ 2008-03-21  1:04                               ` Andreas Dilger
  2008-03-21  9:14                                 ` Andrew Perepechko
  0 siblings, 1 reply; 21+ messages in thread
From: Andreas Dilger @ 2008-03-21  1:04 UTC (permalink / raw)
  To: Andrew Perepechko
  Cc: Jan Kara, linux-fsdevel, Johann Lombardi, Zhiyong Landen tian,
	Alex Lyashkov

On Mar 21, 2008  00:37 +0300, Andrew Perepechko wrote:
> +#define REV_ASSERT(r) BUG_ON((rev) != 0 && (rev) != 1)

Umm, "r" and "rev" are not consistent above...

Since this assertion is only on the in-memory quota structure, it would
probably be better to have something like

#define REV_R0  0x12340000
#define REV_R1  0x12340001

and when unpacking the on-disk revision number or it with REV_OFFSET and

#define REV_ASSERT(revno) BUG_ON((revno) != REV_R0 && (revno) != REV_R1)

That detects the common case of memory being zeroed for some reason.  It
will also easily detect if you aren't properly swabbing the revision
and unmasking the 0x1234000 from the in-memory structure.

Just a thought...  some people may not like this idea, but I dislike
using "0" as a magic number for anything.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs, updated
  2008-03-21  1:04                               ` Andreas Dilger
@ 2008-03-21  9:14                                 ` Andrew Perepechko
  2008-03-21 10:24                                   ` Andrew Perepechko
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Perepechko @ 2008-03-21  9:14 UTC (permalink / raw)
  To: linux-fsdevel
  Cc: Jan Kara, Johann Lombardi, Zhiyong Landen tian, Alex Lyashkov

Hello, Andreas.
On Friday 21 March 2008 04:04:03 you wrote:
> On Mar 21, 2008  00:37 +0300, Andrew Perepechko wrote:
> > +#define REV_ASSERT(r) BUG_ON((rev) != 0 && (rev) != 1)
> 
> Umm, "r" and "rev" are not consistent above...
> 

Indeed. In the beginning I made this macro without arguments, then
added this argument not too carefully. It should work anyway since
it depends on rev, not r, but this needs to be fixed to make sense.

> Since this assertion is only on the in-memory quota structure, it would
> probably be better to have something like
> 
> #define REV_R0  0x12340000
> #define REV_R1  0x12340001
> 
> and when unpacking the on-disk revision number or it with REV_OFFSET and
> 
> #define REV_ASSERT(revno) BUG_ON((revno) != REV_R0 && (revno) != REV_R1)
> 
> That detects the common case of memory being zeroed for some reason.  It
> will also easily detect if you aren't properly swabbing the revision
> and unmasking the 0x1234000 from the in-memory structure.

I think it is a good idea. We can't do a lot for swabbing checks since 0 is for old revision
(and we can't change this) and 1 is a good enough swabbing check. But for internal 
consistency purposes in-memory 0x12340000 and 0x12340001 values look better than 0 and 1.

> 
> Just a thought...  some people may not like this idea, but I dislike
> using "0" as a magic number for anything.
> 
> Cheers, Andreas
> --
> Andreas Dilger
> Sr. Staff Engineer, Lustre Group
> Sun Microsystems of Canada, Inc.
> 
> 

Best.
Andrew.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC] quota: 64-bit limits with vfs, updated
  2008-03-21  9:14                                 ` Andrew Perepechko
@ 2008-03-21 10:24                                   ` Andrew Perepechko
  0 siblings, 0 replies; 21+ messages in thread
From: Andrew Perepechko @ 2008-03-21 10:24 UTC (permalink / raw)
  To: linux-fsdevel
  Cc: Jan Kara, Johann Lombardi, Zhiyong Landen tian, Alex Lyashkov

updated patch (with respect to Andreas suggestions: introduced constants for revs,
honour macro param)

Signed-off-by: Andrew Perepechko <andrew.perepechko@sun.com>

---

 fs/quota_v2.c              |  268 ++++++++++++++++++++++++++++++++-------------
 include/linux/dqblk_v2.h   |    1
 include/linux/quota.h      |   10 -
 include/linux/quotaio_v2.h |   34 ++++-
 4 files changed, 227 insertions(+), 86 deletions(-)

diff -rNpu linux-2.6.24.3.quota64/fs/quota_v2.c linux-2.6.24.3/fs/quota_v2.c
--- linux-2.6.24.3.quota64/fs/quota_v2.c	2008-03-21 00:09:33.000000000 +0300
+++ linux-2.6.24.3/fs/quota_v2.c	2008-03-21 13:13:03.000000000 +0300
@@ -23,26 +23,67 @@ MODULE_LICENSE("GPL");
 typedef char *dqbuf_t;
 
 #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
+#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
+			 sizeof(struct v2_disk_dqdbheader)))
 
-/* Check whether given file is really vfsv0 quotafile */
-static int v2_check_quota_file(struct super_block *sb, int type)
+#define V2_REV_R0 0x12340000
+#define V2_REV_R1 0x12340001
+#define REV_ASSERT(rev) BUG_ON((rev) != V2_REV_R0 && (rev) != V2_REV_R1)
+
+static const union v2_disk_dqblk emptydquot;
+static const union v2_disk_dqblk fakedquot[2] = {
+	{.r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} },
+	{.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
+};
+
+static inline uint v2_dqblksz(uint rev)
+{
+	uint sz;
+
+	REV_ASSERT(rev);
+
+	if (rev == V2_REV_R0)
+		sz = sizeof(struct v2_disk_dqblk_r0);
+	else
+		sz = sizeof(struct v2_disk_dqblk_r1);
+
+	return sz;
+}
+
+/* Number of quota entries in a block */
+static inline int v2_dqstrinblk(uint rev)
+{
+	return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev);
+}
+
+/* Get revision of a quota file, -1 if it does not look a quota file */
+static int v2_quota_file_revision(struct super_block *sb, int type)
 {
 	struct v2_disk_dqheader dqhead;
 	ssize_t size;
 	static const uint quota_magics[] = V2_INITQMAGICS;
-	static const uint quota_versions[] = V2_INITQVERSIONS;
+	static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0;
+	static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1;
  
 	size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
 	if (size != sizeof(struct v2_disk_dqheader)) {
 		printk("quota_v2: failed read expected=%zd got=%zd\n",
 			sizeof(struct v2_disk_dqheader), size);
-		return 0;
+		return -1;
 	}
-	if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
-	    le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
-		return 0;
-	return 1;
+	if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
+		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
+			return V2_REV_R0;
+		if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
+			return V2_REV_R1;
+	}
+	return -1;
+}
+
+/* Check whether given file is really vfsv0 quotafile */
+static inline int v2_check_quota_file(struct super_block *sb, int type)
+{
+	return v2_quota_file_revision(sb, type) != -1;
 }
 
 /* Read information header from quota file */
@@ -51,6 +92,13 @@ static int v2_read_file_info(struct supe
 	struct v2_disk_dqinfo dinfo;
 	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
 	ssize_t size;
+	int rev;
+
+	rev = v2_quota_file_revision(sb, type);
+	if (rev < 0) {
+		printk(KERN_WARNING "Second quota file check failed.\n");
+		return -1;
+	}
 
 	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
@@ -59,15 +107,22 @@ static int v2_read_file_info(struct supe
 			sb->s_id);
 		return -1;
 	}
-	/* limits are stored as unsigned 32-bit data */
-	info->dqi_maxblimit = 0xffffffff;
-	info->dqi_maxilimit = 0xffffffff;
 	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
 	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
 	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
 	info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
 	info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
 	info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+
+	info->u.v2_i.dqi_revision = rev;
+	if (rev == V2_REV_R0) {
+		info->dqi_maxblimit = 0xffffffffULL;
+		info->dqi_maxilimit = 0xffffffffULL;
+	} else {
+		info->dqi_maxblimit = 0xffffffffffffffffULL;
+		info->dqi_maxilimit = 0xffffffffffffffffULL;
+	}
+
 	return 0;
 }
 
@@ -97,29 +152,61 @@ static int v2_write_file_info(struct sup
 	return 0;
 }
 
-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
+static void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d, uint rev)
 {
-	m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
-	m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
-	m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
-	m->dqb_itime = le64_to_cpu(d->dqb_itime);
-	m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
-	m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
-	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
-	m->dqb_btime = le64_to_cpu(d->dqb_btime);
-}
-
-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
-{
-	d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
-	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
-	d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
-	d->dqb_itime = cpu_to_le64(m->dqb_itime);
-	d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
-	d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
-	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
-	d->dqb_btime = cpu_to_le64(m->dqb_btime);
-	d->dqb_id = cpu_to_le32(id);
+	REV_ASSERT(rev);
+
+	if (rev == V2_REV_R0) {
+		struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
+		m->dqb_ihardlimit = le32_to_cpu(ddqblk->dqb_ihardlimit);
+		m->dqb_isoftlimit = le32_to_cpu(ddqblk->dqb_isoftlimit);
+		m->dqb_curinodes = le32_to_cpu(ddqblk->dqb_curinodes);
+		m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
+		m->dqb_bhardlimit = le32_to_cpu(ddqblk->dqb_bhardlimit);
+		m->dqb_bsoftlimit = le32_to_cpu(ddqblk->dqb_bsoftlimit);
+		m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
+		m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
+	} else {
+		struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
+		m->dqb_ihardlimit = le64_to_cpu(ddqblk->dqb_ihardlimit);
+		m->dqb_isoftlimit = le64_to_cpu(ddqblk->dqb_isoftlimit);
+		m->dqb_curinodes = le64_to_cpu(ddqblk->dqb_curinodes);
+		m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
+		m->dqb_bhardlimit = le64_to_cpu(ddqblk->dqb_bhardlimit);
+		m->dqb_bsoftlimit = le64_to_cpu(ddqblk->dqb_bsoftlimit);
+		m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
+		m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
+	}
+}
+
+static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m,
+			qid_t id, uint rev)
+{
+	REV_ASSERT(rev);
+
+	if (rev == V2_REV_R0) {
+		struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
+		ddqblk->dqb_id = cpu_to_le32(id);
+		ddqblk->dqb_ihardlimit = cpu_to_le32((__u32)m->dqb_ihardlimit);
+		ddqblk->dqb_isoftlimit = cpu_to_le32((__u32)m->dqb_isoftlimit);
+		ddqblk->dqb_curinodes = cpu_to_le32((__u32)m->dqb_curinodes);
+		ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
+		ddqblk->dqb_bhardlimit = cpu_to_le32((__u32)m->dqb_bhardlimit);
+		ddqblk->dqb_bsoftlimit = cpu_to_le32((__u32)m->dqb_bsoftlimit);
+		ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+		ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
+	} else {
+		struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
+		ddqblk->dqb_id = cpu_to_le32(id);
+		ddqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
+		ddqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
+		ddqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
+		ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
+		ddqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
+		ddqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
+		ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+		ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
+	}
 }
 
 static dqbuf_t getdqbuf(void)
@@ -271,10 +358,10 @@ static uint find_free_dqentry(struct dqu
 {
 	struct super_block *sb = dquot->dq_sb;
 	struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
-	uint blk, i;
+	uint blk, i, rev = info->u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 	struct v2_disk_dqdbheader *dh;
-	struct v2_disk_dqblk *ddquot;
-	struct v2_disk_dqblk fakedquot;
+	union v2_disk_dqblk *ddquot;
 	dqbuf_t buf;
 
 	*err = 0;
@@ -301,17 +388,18 @@ static uint find_free_dqentry(struct dqu
 		info->u.v2_i.dqi_free_entry = blk;
 		mark_info_dirty(sb, dquot->dq_type);
 	}
-	if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)	/* Block will be full? */
+	/* Block will be full? */
+	if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
 		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
 			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
 			goto out_buf;
 		}
 	dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
-	memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
 	/* Find free structure in block */
-	for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
+	for (i = 0; i < dqstrinblk && memcmp(&emptydquot, ddquot, dqblksz);
+	     i++, ddquot = (union v2_disk_dqblk *)((char *)ddquot + dqblksz));
 #ifdef __QUOTA_V2_PARANOIA
-	if (i == V2_DQSTRINBLK) {
+	if (i == dqstrinblk) {
 		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
 		*err = -EIO;
 		goto out_buf;
@@ -321,7 +409,8 @@ static uint find_free_dqentry(struct dqu
 		printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
 		goto out_buf;
 	}
-	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
+	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
+			((char *)ddquot - (char *)buf);
 	freedqbuf(buf);
 	return blk;
 out_buf:
@@ -395,7 +484,9 @@ static int v2_write_dquot(struct dquot *
 {
 	int type = dquot->dq_type;
 	ssize_t ret;
-	struct v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot;
+	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev);
 
 	/* dq_off is guarded by dqio_mutex */
 	if (!dquot->dq_off)
@@ -404,18 +495,22 @@ static int v2_write_dquot(struct dquot *
 			return ret;
 		}
 	spin_lock(&dq_data_lock);
-	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
+	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev);
 	/* Argh... We may need to write structure full of zeroes but that would be
 	 * treated as an empty place by the rest of the code. Format change would
 	 * be definitely cleaner but the problems probably are not worth it */
-	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-	if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-		ddquot.dqb_itime = cpu_to_le64(1);
+	if (!memcmp(&emptydquot, &ddquot, dqblksz)) {
+		if (rev == V2_REV_R0)
+			ddquot.r0.dqb_itime = cpu_to_le64(1);
+		else
+			ddquot.r1.dqb_itime = cpu_to_le64(1);
+	}
 	spin_unlock(&dq_data_lock);
 	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
-	      (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
-	if (ret != sizeof(struct v2_disk_dqblk)) {
-		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
+	      (char *)&ddquot, dqblksz, dquot->dq_off);
+	if (ret != dqblksz) {
+		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
+			dquot->dq_sb->s_id);
 		if (ret >= 0)
 			ret = -ENOSPC;
 	}
@@ -434,6 +529,7 @@ static int free_dqentry(struct dquot *dq
 	struct v2_disk_dqdbheader *dh;
 	dqbuf_t buf = getdqbuf();
 	int ret = 0;
+	uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
 
 	if (!buf)
 		return -ENOMEM;
@@ -459,8 +555,8 @@ static int free_dqentry(struct dquot *dq
 	}
 	else {
 		memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
-		  sizeof(struct v2_disk_dqblk));
-		if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
+		  v2_dqblksz(rev));
+		if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) {
 			/* Insert will write block itself */
 			if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
 				printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
@@ -532,41 +628,58 @@ static int v2_delete_dquot(struct dquot
 	return remove_tree(dquot, &tmp, 0);
 }
 
+static inline __u32 dqid(union v2_disk_dqblk *ddquot, uint rev)
+{
+	__u32 dq_id;
+
+	REV_ASSERT(rev);
+
+	if (rev == V2_REV_R0)
+		dq_id = le32_to_cpu(ddquot->r0.dqb_id);
+	else
+		dq_id = le32_to_cpu(ddquot->r1.dqb_id);
+
+	return dq_id;
+}
+
 /* Find entry in block */
 static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
 {
 	dqbuf_t buf = getdqbuf();
 	loff_t ret = 0;
 	int i;
-	struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
+	union v2_disk_dqblk *ddquot = GETENTRIES(buf);
+	int type = dquot->dq_type;
+	uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
+	uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
 
 	if (!buf)
 		return -ENOMEM;
-	if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
+
+	ret = read_blk(dquot->dq_sb, type, blk, buf);
+	if (ret < 0) {
 		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
 		goto out_buf;
 	}
 	if (dquot->dq_id)
-		for (i = 0; i < V2_DQSTRINBLK &&
-		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
+		for (i = 0; i < dqstrinblk && dqid(ddquot, rev) != dquot->dq_id;
+		     ddquot = (union v2_disk_dqblk *)((char *)ddquot+dqblksz),
+		     i++);
 	else {	/* ID 0 as a bit more complicated searching... */
-		struct v2_disk_dqblk fakedquot;
-
-		memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
-		for (i = 0; i < V2_DQSTRINBLK; i++)
-			if (!le32_to_cpu(ddquot[i].dqb_id) &&
-			    memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
+		for (i = 0; i < dqstrinblk; i++,
+		     ddquot = (union v2_disk_dqblk *)((char *)ddquot+dqblksz))
+			if (!dqid(ddquot, rev) &&
+			    memcmp(&emptydquot, ddquot, dqblksz))
 				break;
 	}
-	if (i == V2_DQSTRINBLK) {
+	if (i == dqstrinblk) {
 		printk(KERN_ERR "VFS: Quota for id %u referenced "
 		  "but not present.\n", dquot->dq_id);
 		ret = -EIO;
 		goto out_buf;
 	}
 	else
-		ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
-		  v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
+		ret = (blk << V2_DQBLKSIZE_BITS)+((char *)ddquot-(char *)buf);
 out_buf:
 	freedqbuf(buf);
 	return ret;
@@ -608,7 +721,7 @@ static int v2_read_dquot(struct dquot *d
 {
 	int type = dquot->dq_type;
 	loff_t offset;
-	struct v2_disk_dqblk ddquot, empty;
+	union v2_disk_dqblk ddquot;
 	int ret = 0;
 
 #ifdef __QUOTA_V2_PARANOIA
@@ -629,25 +742,30 @@ static int v2_read_dquot(struct dquot *d
 		ret = offset;
 	}
 	else {
+		uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.
+			   dqi_revision;
+		uint  dqblksz = v2_dqblksz(rev);
 		dquot->dq_off = offset;
-		if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
-		    (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
-		    != sizeof(struct v2_disk_dqblk)) {
+		ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
+					   (char *)&ddquot, dqblksz, offset);
+		if (ret != dqblksz) {
 			if (ret >= 0)
 				ret = -EIO;
 			printk(KERN_ERR "VFS: Error while reading quota "
 			  "structure for id %u.\n", dquot->dq_id);
-			memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
+			memset(&ddquot, 0, dqblksz);
 		}
 		else {
 			ret = 0;
 			/* We need to escape back all-zero structure */
-			memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-			empty.dqb_itime = cpu_to_le64(1);
-			if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-				ddquot.dqb_itime = 0;
+			if (!memcmp(&fakedquot[rev], &ddquot, dqblksz)) {
+				if (rev == V2_REV_R0)
+					ddquot.r0.dqb_itime = cpu_to_le64(0);
+				else
+					ddquot.r1.dqb_itime = cpu_to_le64(0);
+			}
 		}
-		disk2memdqb(&dquot->dq_dqb, &ddquot);
+		disk2memdqb(&dquot->dq_dqb, &ddquot, rev);
 		if (!dquot->dq_dqb.dqb_bhardlimit &&
 			!dquot->dq_dqb.dqb_bsoftlimit &&
 			!dquot->dq_dqb.dqb_ihardlimit &&
diff -rNpu linux-2.6.24.3.quota64/include/linux/dqblk_v2.h linux-2.6.24.3/include/linux/dqblk_v2.h
--- linux-2.6.24.3.quota64/include/linux/dqblk_v2.h	2008-03-19 17:29:28.000000000 +0300
+++ linux-2.6.24.3/include/linux/dqblk_v2.h	2008-03-21 13:13:03.000000000 +0300
@@ -21,6 +21,7 @@ struct v2_mem_dqinfo {
 	unsigned int dqi_blocks;
 	unsigned int dqi_free_blk;
 	unsigned int dqi_free_entry;
+	unsigned int dqi_revision;
 };
 
 #endif /* _LINUX_DQBLK_V2_H */
diff -rNpu linux-2.6.24.3.quota64/include/linux/quota.h linux-2.6.24.3/include/linux/quota.h
--- linux-2.6.24.3.quota64/include/linux/quota.h	2008-03-21 00:09:33.000000000 +0300
+++ linux-2.6.24.3/include/linux/quota.h	2008-03-21 13:13:03.000000000 +0300
@@ -181,12 +181,12 @@ extern spinlock_t dq_data_lock;
  * Data for one user/group kept in memory
  */
 struct mem_dqblk {
-	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
-	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
+	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
 	qsize_t dqb_curspace;	/* current used space */
-	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__u32 dqb_isoftlimit;	/* preferred inode limit */
-	__u32 dqb_curinodes;	/* current # allocated inodes */
+	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	qsize_t dqb_isoftlimit;	/* preferred inode limit */
+	qsize_t dqb_curinodes;	/* current # allocated inodes */
 	time_t dqb_btime;	/* time limit for excessive disk use */
 	time_t dqb_itime;	/* time limit for excessive inode use */
 };
diff -rNpu linux-2.6.24.3.quota64/include/linux/quotaio_v2.h linux-2.6.24.3/include/linux/quotaio_v2.h
--- linux-2.6.24.3.quota64/include/linux/quotaio_v2.h	2008-03-19 17:29:28.000000000 +0300
+++ linux-2.6.24.3/include/linux/quotaio_v2.h	2008-03-21 13:13:03.000000000 +0300
@@ -16,28 +16,51 @@
 	0xd9c01927	/* GRPQUOTA */\
 }
 
-#define V2_INITQVERSIONS {\
+#define V2_INITQVERSIONS_R0 {\
 	0,		/* USRQUOTA */\
 	0		/* GRPQUOTA */\
 }
 
+#define V2_INITQVERSIONS_R1 {\
+	1,		/* USRQUOTA */\
+	1		/* GRPQUOTA */\
+}
+
 /*
  * The following structure defines the format of the disk quota file
  * (as it appears on disk) - the file is a radix tree whose leaves point
  * to blocks of these structures.
  */
-struct v2_disk_dqblk {
+struct v2_disk_dqblk_r0 {
 	__le32 dqb_id;		/* id this quota applies to */
 	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
 	__le32 dqb_isoftlimit;	/* preferred inode limit */
 	__le32 dqb_curinodes;	/* current # allocated inodes */
-	__le32 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
-	__le32 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
+	__le32 dqb_bhardlimit;	/* absolute limit on disk space */
+	__le32 dqb_bsoftlimit;	/* preferred limit on disk space */
+	__le64 dqb_curspace;	/* current space occupied (in bytes) */
+	__le64 dqb_btime;	/* time limit for excessive disk use */
+	__le64 dqb_itime;	/* time limit for excessive inode use */
+};
+
+struct v2_disk_dqblk_r1 {
+	__le32 dqb_id;		/* id this quota applies to */
+	__le32 dqb_padding;	/* padding field */
+	__le64 dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	__le64 dqb_isoftlimit;	/* preferred inode limit */
+	__le64 dqb_curinodes;	/* current # allocated inodes */
+	__le64 dqb_bhardlimit;	/* absolute limit on disk space */
+	__le64 dqb_bsoftlimit;	/* preferred limit on disk space */
 	__le64 dqb_curspace;	/* current space occupied (in bytes) */
 	__le64 dqb_btime;	/* time limit for excessive disk use */
 	__le64 dqb_itime;	/* time limit for excessive inode use */
 };
 
+union v2_disk_dqblk {
+	struct v2_disk_dqblk_r0 r0;
+	struct v2_disk_dqblk_r1 r1;
+};
+
 /*
  * Here are header structures as written on disk and their in-memory copies
  */
@@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
 
 /*
  *  Structure of header of block with quota structures. It is padded to 16 bytes so
- *  there will be space for exactly 21 quota-entries in a block
+ *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
  */
 struct v2_disk_dqdbheader {
 	__le32 dqdh_next_free;	/* Number of next block with free entry */
@@ -74,6 +97,5 @@ struct v2_disk_dqdbheader {
 #define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
 #define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
 #define V2_DQTREEDEPTH	4		/* Depth of quota tree */
-#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
 
 #endif /* _LINUX_QUOTAIO_V2_H */

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2008-03-21 10:21 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-06 13:41 [RFC] quota: 64-bit limits with vfs Andrew Perepechko
2008-03-06 14:48 ` Jan Kara
2008-03-09 22:46   ` Andrew Perepechko
2008-03-10 16:26     ` Jan Kara
2008-03-10 17:13       ` Andrew Perepechko
2008-03-10 17:20         ` Jan Kara
2008-03-14 13:08           ` Andrew Perepechko
2008-03-15  4:23             ` Andreas Dilger
2008-03-15 13:24               ` Andrew Perepechko
2008-03-15 13:32                 ` Andrew Perepechko
2008-03-15 14:45                 ` Andreas Dilger
2008-03-15 18:58                   ` [RFC] quota: 64-bit limits with vfs, updated Andrew Perepechko
2008-03-15 22:47                     ` Andreas Dilger
2008-03-16  1:14                       ` Andrew Perepechko
2008-03-16 11:21                         ` Andrew Perepechko
2008-03-17 14:35                           ` Jan Kara
2008-03-20 21:37                             ` Andrew Perepechko
2008-03-21  1:04                               ` Andreas Dilger
2008-03-21  9:14                                 ` Andrew Perepechko
2008-03-21 10:24                                   ` Andrew Perepechko
2008-03-17 14:51                         ` Jan Kara

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).