From: Jan Kara <jack@suse.cz>
To: Andrew Perepechko <Andrew.Perepechko@Sun.COM>
Cc: linux-fsdevel@vger.kernel.org,
Johann Lombardi <Johann.Lombardi@Sun.COM>,
Zhiyong Landen tian <Zhiyong.Tian@Sun.COM>,
Alex Lyashkov <Alexey.Lyashkov@Sun.COM>
Subject: Re: [RFC] quota: 64-bit limits with vfs
Date: Mon, 10 Mar 2008 17:26:09 +0100 [thread overview]
Message-ID: <20080310162609.GA30435@duck.suse.cz> (raw)
In-Reply-To: <200803100146.03671.andrew.perepechko@sun.com>
Hello Andrew,
On Mon 10-03-08 01:46:02, Andrew Perepechko wrote:
> Did you mean something like the following? It seems to be quite a large patch
> as I expected, but it does keep quota_v2.c structure.
Yes, that's exactly what I meant. Thanks for writing this. Below in the
patch are some minor comments. After fixing those I think I can take the
patch.
Honza
> diff -rNpu quota.orig/fs/quota_v2.c quota/fs/quota_v2.c
> --- quota.orig/fs/quota_v2.c 2008-01-25 01:58:37.000000000 +0300
> +++ quota/fs/quota_v2.c 2008-03-10 01:20:28.000000000 +0300
> @@ -23,26 +23,66 @@ MODULE_LICENSE("GPL");
> typedef char *dqbuf_t;
>
> #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
> -#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
> +#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
> + sizeof(struct v2_disk_dqdbheader)))
>
> -/* Check whether given file is really vfsv0 quotafile */
> -static int v2_check_quota_file(struct super_block *sb, int type)
> +static inline uint v2_dqblksz(uint rev)
> +{
> + uint sz = 0; /* make the compiler happy */
> +
> + switch (rev) {
> + case 0:
> + sz = sizeof(struct v2_disk_dqblk_r0);
> + break;
> + case 1:
> + sz = sizeof(struct v2_disk_dqblk_r1);
> + break;
> + default:
> + BUG();
> + }
> +
> + return sz;
> +}
> +
> +/* Number of quota entries in a block */
> +static inline int v2_dqstrinblk(uint rev)
> +{
> + return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev);
> +}
> +
> +/* Get revision of a quota file, -1 if it does not look a quota file */
> +static int v2_quota_file_revision(struct super_block *sb, int type)
> {
> struct v2_disk_dqheader dqhead;
> ssize_t size;
> static const uint quota_magics[] = V2_INITQMAGICS;
> - static const uint quota_versions[] = V2_INITQVERSIONS;
> + static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0;
> + static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1;
>
> size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
> if (size != sizeof(struct v2_disk_dqheader)) {
> printk("quota_v2: failed read expected=%zd got=%zd\n",
> sizeof(struct v2_disk_dqheader), size);
> - return 0;
> + return -1;
> }
> - if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
> - le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
> - return 0;
> - return 1;
> + if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
> + if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
> + return 0;
> + if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
> + return 1;
> + }
> + return -1;
> +}
> +
> +/* Check whether given file is really vfsv0 quotafile */
> +static int v2_check_quota_file(struct super_block *sb, int type)
> +{
> + return v2_quota_file_revision(sb, type) != -1;
> +}
> +
> +static qsize_t maxlimit(int rev)
> +{
> + return (rev == 0) ? 0xffffffffULL : 0xffffffffffffffffULL;
Hmm, since we use this function just in v2_read_file_info and it doesn't
make much sence to me to combine inode and block limits, I'd just remove
this function and do something like:
if (rev == 0) {
info->dqi_maxbhardlimit = 0xffffffffULL;
...
}
else {
info->dqi_maxbhardlimit = 0xffffffffffffffffULL;
...
}
BTW: It probably doesn't make sence to have different limits for
hardlimit and softlimit, so I'd just use two limits:
maxblimit and maxilimit.
> }
>
> /* Read information header from quota file */
> @@ -51,6 +91,10 @@ static int v2_read_file_info(struct supe
> struct v2_disk_dqinfo dinfo;
> struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
> ssize_t size;
> + int rev;
> +
> + rev = v2_quota_file_revision(sb, type);
> + BUG_ON(rev < 0);
>
> size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
> sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
> @@ -65,6 +109,13 @@ static int v2_read_file_info(struct supe
> info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
> info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
> info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
> +
> + info->u.v2_i.dqi_revision = rev;
> + info->dqi_maxbhardlimit = maxlimit(rev);
> + info->dqi_maxbsoftlimit = maxlimit(rev);
> + info->dqi_maxihardlimit = maxlimit(rev);
> + info->dqi_maxisoftlimit = maxlimit(rev);
> +
Please see above.
> return 0;
> }
>
> @@ -94,29 +145,47 @@ static int v2_write_file_info(struct sup
> return 0;
> }
>
> -static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
> -{
> - m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
> - m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
> - m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
> - m->dqb_itime = le64_to_cpu(d->dqb_itime);
> - m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
> - m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
> - m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
> - m->dqb_btime = le64_to_cpu(d->dqb_btime);
> -}
> -
> -static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
> -{
> - d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
> - d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
> - d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
> - d->dqb_itime = cpu_to_le64(m->dqb_itime);
> - d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
> - d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
> - d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
> - d->dqb_btime = cpu_to_le64(m->dqb_btime);
> - d->dqb_id = cpu_to_le32(id);
> +#define DQ2MQ(v) (sizeof(v) == sizeof(__u64) ? \
> + (qsize_t)le64_to_cpu(v) : \
> + (qsize_t)le32_to_cpu(v))
> +
> +#define MQ2DQ(v, newv) (sizeof(v) == sizeof(__u64) ? \
> + (v = cpu_to_le64((__u64)newv)) : \
> + (v = cpu_to_le32((__u32)newv)))
> +
> +#define DQF_GET(var, rev, field) (rev == 0 ? \
> + DQ2MQ((var)->disk_dqblk_r0.field) : \
> + DQ2MQ((var)->disk_dqblk_r1.field))
> +
> +#define DQF_PUT(var, rev, field, val) (rev == 0 ? \
> + MQ2DQ((var)->disk_dqblk_r0.field, val) : \
> + MQ2DQ((var)->disk_dqblk_r1.field, val))
Actually, these macros will do the right thing for dqb_id only be sheer
luck and they won't work for dqb_curspace, dqb_itime and dqb_btime.
Please just get rid of them, they aren't very nice anyway. In disk2memdqb()
and mem2diskdqb() just do
if (rev == 0) {
conversions..
}
else if (ret == 1) {
conversions..
}
else {
BUG();
}
And for fields dqb_id and dqb_itime we use at other places introduce
functions get_dqb_id() and get/set_dqb_itime().
> +
> +void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d,
> + uint rev)
> +{
> + m->dqb_ihardlimit = DQF_GET(d, rev, dqb_ihardlimit);
> + m->dqb_isoftlimit = DQF_GET(d, rev, dqb_isoftlimit);
> + m->dqb_curinodes = DQF_GET(d, rev, dqb_curinodes);
> + m->dqb_itime = DQF_GET(d, rev, dqb_itime);
> + m->dqb_bhardlimit = DQF_GET(d, rev, dqb_bhardlimit);
> + m->dqb_bsoftlimit = DQF_GET(d, rev, dqb_bsoftlimit);
> + m->dqb_curspace = DQF_GET(d, rev, dqb_curspace);
> + m->dqb_btime = DQF_GET(d, rev, dqb_btime);
> +}
> +
> +static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m,
> + qid_t id, uint rev)
> +{
> + DQF_PUT(d, rev, dqb_ihardlimit, m->dqb_ihardlimit);
> + DQF_PUT(d, rev, dqb_isoftlimit, m->dqb_isoftlimit);
> + DQF_PUT(d, rev, dqb_curinodes, m->dqb_curinodes);
> + DQF_PUT(d, rev, dqb_itime, m->dqb_itime);
> + DQF_PUT(d, rev, dqb_bhardlimit, m->dqb_bhardlimit);
> + DQF_PUT(d, rev, dqb_bsoftlimit, m->dqb_bsoftlimit);
> + DQF_PUT(d, rev, dqb_curspace, m->dqb_curspace);
> + DQF_PUT(d, rev, dqb_btime, m->dqb_btime);
> + DQF_PUT(d, rev, dqb_id, id);
> }
>
> static dqbuf_t getdqbuf(void)
> @@ -268,10 +337,11 @@ static uint find_free_dqentry(struct dqu
> {
> struct super_block *sb = dquot->dq_sb;
> struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
> - uint blk, i;
> + uint blk, i, rev = info->u.v2_i.dqi_revision,
> + dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
^^^
Please just do another uint declaration so that we don't have to wrap
lines.
> struct v2_disk_dqdbheader *dh;
> - struct v2_disk_dqblk *ddquot;
> - struct v2_disk_dqblk fakedquot;
> + union v2_disk_dqblk *ddquot;
> + union v2_disk_dqblk fakedquot;
> dqbuf_t buf;
>
> *err = 0;
> @@ -298,17 +368,19 @@ static uint find_free_dqentry(struct dqu
> info->u.v2_i.dqi_free_entry = blk;
> mark_info_dirty(sb, dquot->dq_type);
> }
> - if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK) /* Block will be full? */
> + /* Block will be full? */
> + if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
> if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
> printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
> goto out_buf;
> }
> dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
> - memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
> + memset(&fakedquot, 0, dqblksz);
> /* Find free structure in block */
> - for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
> + for (i = 0; i < dqstrinblk && memcmp(&fakedquot,
> + ddquot+i, dqblksz); i++);
> #ifdef __QUOTA_V2_PARANOIA
> - if (i == V2_DQSTRINBLK) {
> + if (i == dqstrinblk) {
> printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
> *err = -EIO;
> goto out_buf;
> @@ -318,7 +390,8 @@ static uint find_free_dqentry(struct dqu
> printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
> goto out_buf;
> }
> - dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
> + dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
> + sizeof(struct v2_disk_dqdbheader)+i*dqblksz;
> freedqbuf(buf);
> return blk;
> out_buf:
> @@ -392,7 +465,10 @@ static int v2_write_dquot(struct dquot *
> {
> int type = dquot->dq_type;
> ssize_t ret;
> - struct v2_disk_dqblk ddquot, empty;
> + union v2_disk_dqblk ddquot, empty;
> + struct super_block *sb = dquot->dq_sb;
> + uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
> + uint dqblksz = v2_dqblksz(rev);
>
> /* dq_off is guarded by dqio_mutex */
> if (!dquot->dq_off)
> @@ -401,18 +477,19 @@ static int v2_write_dquot(struct dquot *
> return ret;
> }
> spin_lock(&dq_data_lock);
> - mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
> + mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev);
> /* Argh... We may need to write structure full of zeroes but that would be
> * treated as an empty place by the rest of the code. Format change would
> * be definitely cleaner but the problems probably are not worth it */
> - memset(&empty, 0, sizeof(struct v2_disk_dqblk));
> - if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
> - ddquot.dqb_itime = cpu_to_le64(1);
> + memset(&empty, 0, dqblksz);
> + if (!memcmp(&empty, &ddquot, dqblksz))
> + DQF_PUT(&ddquot, rev, dqb_itime, 1);
> spin_unlock(&dq_data_lock);
> - ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
> - (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
> - if (ret != sizeof(struct v2_disk_dqblk)) {
> - printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
> + ret = sb->s_op->quota_write(sb, type,
> + (char *)&ddquot, dqblksz, dquot->dq_off);
> + if (ret != dqblksz) {
> + printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
> + sb->s_id);
> if (ret >= 0)
> ret = -ENOSPC;
> }
> @@ -431,6 +508,7 @@ static int free_dqentry(struct dquot *dq
> struct v2_disk_dqdbheader *dh;
> dqbuf_t buf = getdqbuf();
> int ret = 0;
> + uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
>
> if (!buf)
> return -ENOMEM;
> @@ -456,8 +534,8 @@ static int free_dqentry(struct dquot *dq
> }
> else {
> memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
> - sizeof(struct v2_disk_dqblk));
> - if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
> + v2_dqblksz(rev));
> + if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) {
> /* Insert will write block itself */
> if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
> printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
> @@ -535,27 +613,33 @@ static loff_t find_block_dqentry(struct
> dqbuf_t buf = getdqbuf();
> loff_t ret = 0;
> int i;
> - struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
> + union v2_disk_dqblk *ddquot = GETENTRIES(buf);
> + struct super_block *sb = dquot->dq_sb;
> + int type = dquot->dq_type;
> + uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
> + uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
>
> if (!buf)
> return -ENOMEM;
> - if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
> +
> + ret = read_blk(sb, type, blk, buf);
> + if (ret < 0) {
> printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
> goto out_buf;
> }
> if (dquot->dq_id)
> - for (i = 0; i < V2_DQSTRINBLK &&
> - le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
> + for (i = 0; i < dqstrinblk &&
> + DQF_GET(ddquot+i, rev, dqb_id) != dquot->dq_id; i++);
> else { /* ID 0 as a bit more complicated searching... */
> - struct v2_disk_dqblk fakedquot;
> + union v2_disk_dqblk fakedquot;
>
> - memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
> - for (i = 0; i < V2_DQSTRINBLK; i++)
> - if (!le32_to_cpu(ddquot[i].dqb_id) &&
> - memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
> + memset(&fakedquot, 0, dqblksz);
> + for (i = 0; i < dqstrinblk; i++)
> + if (!DQF_GET(ddquot+i, rev, dqb_id) &&
> + memcmp(&fakedquot, ddquot+i, dqblksz))
> break;
> }
> - if (i == V2_DQSTRINBLK) {
> + if (i == dqstrinblk) {
> printk(KERN_ERR "VFS: Quota for id %u referenced "
> "but not present.\n", dquot->dq_id);
> ret = -EIO;
> @@ -563,7 +647,7 @@ static loff_t find_block_dqentry(struct
> }
> else
> ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
> - v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
> + v2_disk_dqdbheader) + i * dqblksz;
> out_buf:
> freedqbuf(buf);
> return ret;
> @@ -605,12 +689,13 @@ static int v2_read_dquot(struct dquot *d
> {
> int type = dquot->dq_type;
> loff_t offset;
> - struct v2_disk_dqblk ddquot, empty;
> + union v2_disk_dqblk ddquot, empty;
> int ret = 0;
> + struct super_block *sb = dquot->dq_sb;
>
> #ifdef __QUOTA_V2_PARANOIA
> /* Invalidated quota? */
> - if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
> + if (!sb || !sb_dqopt(sb)->files[type]) {
> printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
> return -EIO;
> }
> @@ -626,25 +711,27 @@ static int v2_read_dquot(struct dquot *d
> ret = offset;
> }
> else {
> + uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision,
> + dqblksz = v2_dqblksz(rev);
> dquot->dq_off = offset;
> - if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
> - (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
> - != sizeof(struct v2_disk_dqblk)) {
> + ret = sb->s_op->quota_read(sb, type, (char *)&ddquot,
> + dqblksz, offset);
> + if (ret != dqblksz) {
> if (ret >= 0)
> ret = -EIO;
> printk(KERN_ERR "VFS: Error while reading quota "
> "structure for id %u.\n", dquot->dq_id);
> - memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
> + memset(&ddquot, 0, dqblksz);
> }
> else {
> ret = 0;
> /* We need to escape back all-zero structure */
> - memset(&empty, 0, sizeof(struct v2_disk_dqblk));
> - empty.dqb_itime = cpu_to_le64(1);
> - if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
> - ddquot.dqb_itime = 0;
> + memset(&empty, 0, dqblksz);
> + DQF_PUT(&empty, rev, dqb_itime, 1);
> + if (!memcmp(&empty, &ddquot, dqblksz))
> + DQF_PUT(&ddquot, rev, dqb_itime, 0);
> }
> - disk2memdqb(&dquot->dq_dqb, &ddquot);
> + disk2memdqb(&dquot->dq_dqb, &ddquot, rev);
> if (!dquot->dq_dqb.dqb_bhardlimit &&
> !dquot->dq_dqb.dqb_bsoftlimit &&
> !dquot->dq_dqb.dqb_ihardlimit &&
> diff -rNpu quota.orig/include/linux/dqblk_v2.h quota/include/linux/dqblk_v2.h
> --- quota.orig/include/linux/dqblk_v2.h 2008-01-25 01:58:37.000000000 +0300
> +++ quota/include/linux/dqblk_v2.h 2008-03-08 22:27:02.000000000 +0300
> @@ -21,6 +21,7 @@ struct v2_mem_dqinfo {
> unsigned int dqi_blocks;
> unsigned int dqi_free_blk;
> unsigned int dqi_free_entry;
> + unsigned int dqi_revision;
> };
>
> #endif /* _LINUX_DQBLK_V2_H */
> diff -rNpu quota.orig/include/linux/quota.h quota/include/linux/quota.h
> --- quota.orig/include/linux/quota.h 2008-01-25 01:58:37.000000000 +0300
> +++ quota/include/linux/quota.h 2008-03-09 22:38:31.000000000 +0300
> @@ -181,12 +181,12 @@ extern spinlock_t dq_data_lock;
> * Data for one user/group kept in memory
> */
> struct mem_dqblk {
> - __u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */
> - __u32 dqb_bsoftlimit; /* preferred limit on disk blks */
> + qsize_t dqb_bhardlimit; /* absolute limit on disk blks alloc */
> + qsize_t dqb_bsoftlimit; /* preferred limit on disk blks */
> qsize_t dqb_curspace; /* current used space */
> - __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */
> - __u32 dqb_isoftlimit; /* preferred inode limit */
> - __u32 dqb_curinodes; /* current # allocated inodes */
> + qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */
> + qsize_t dqb_isoftlimit; /* preferred inode limit */
> + qsize_t dqb_curinodes; /* current # allocated inodes */
> time_t dqb_btime; /* time limit for excessive disk use */
> time_t dqb_itime; /* time limit for excessive inode use */
> };
> @@ -202,6 +202,10 @@ struct mem_dqinfo {
> unsigned long dqi_flags;
> unsigned int dqi_bgrace;
> unsigned int dqi_igrace;
> + qsize_t dqi_maxbhardlimit;
> + qsize_t dqi_maxbsoftlimit;
> + qsize_t dqi_maxihardlimit;
> + qsize_t dqi_maxisoftlimit;
> union {
> struct v1_mem_dqinfo v1_i;
> struct v2_mem_dqinfo v2_i;
> diff -rNpu quota.orig/include/linux/quotaio_v2.h quota/include/linux/quotaio_v2.h
> --- quota.orig/include/linux/quotaio_v2.h 2008-01-25 01:58:37.000000000 +0300
> +++ quota/include/linux/quotaio_v2.h 2008-03-09 20:34:42.000000000 +0300
> @@ -16,28 +16,51 @@
> 0xd9c01927 /* GRPQUOTA */\
> }
>
> -#define V2_INITQVERSIONS {\
> +#define V2_INITQVERSIONS_R0 {\
> 0, /* USRQUOTA */\
> 0 /* GRPQUOTA */\
> }
>
> +#define V2_INITQVERSIONS_R1 {\
> + 1, /* USRQUOTA */\
> + 1 /* GRPQUOTA */\
> +}
> +
> /*
> * The following structure defines the format of the disk quota file
> * (as it appears on disk) - the file is a radix tree whose leaves point
> * to blocks of these structures.
> */
> -struct v2_disk_dqblk {
> +struct v2_disk_dqblk_r0 {
> __le32 dqb_id; /* id this quota applies to */
> __le32 dqb_ihardlimit; /* absolute limit on allocated inodes */
> __le32 dqb_isoftlimit; /* preferred inode limit */
> __le32 dqb_curinodes; /* current # allocated inodes */
> - __le32 dqb_bhardlimit; /* absolute limit on disk space (in QUOTABLOCK_SIZE) */
> - __le32 dqb_bsoftlimit; /* preferred limit on disk space (in QUOTABLOCK_SIZE) */
> + __le32 dqb_bhardlimit; /* absolute limit on disk space */
> + __le32 dqb_bsoftlimit; /* preferred limit on disk space */
> + __le64 dqb_curspace; /* current space occupied (in bytes) */
> + __le64 dqb_btime; /* time limit for excessive disk use */
> + __le64 dqb_itime; /* time limit for excessive inode use */
> +};
> +
> +struct v2_disk_dqblk_r1 {
> + __le32 dqb_id; /* id this quota applies to */
> + __le32 dqb_padding; /* padding field */
> + __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */
> + __le64 dqb_isoftlimit; /* preferred inode limit */
> + __le64 dqb_curinodes; /* current # allocated inodes */
> + __le64 dqb_bhardlimit; /* absolute limit on disk space */
> + __le64 dqb_bsoftlimit; /* preferred limit on disk space */
> __le64 dqb_curspace; /* current space occupied (in bytes) */
> __le64 dqb_btime; /* time limit for excessive disk use */
> __le64 dqb_itime; /* time limit for excessive inode use */
> };
>
> +union v2_disk_dqblk {
> + struct v2_disk_dqblk_r0 disk_dqblk_r0;
> + struct v2_disk_dqblk_r1 disk_dqblk_r1;
> +};
> +
> /*
> * Here are header structures as written on disk and their in-memory copies
> */
> @@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
>
> /*
> * Structure of header of block with quota structures. It is padded to 16 bytes so
> - * there will be space for exactly 21 quota-entries in a block
> + * there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
> */
> struct v2_disk_dqdbheader {
> __le32 dqdh_next_free; /* Number of next block with free entry */
> @@ -74,6 +97,5 @@ struct v2_disk_dqdbheader {
> #define V2_DQBLKSIZE (1 << V2_DQBLKSIZE_BITS) /* Size of block with quota structures */
> #define V2_DQTREEOFF 1 /* Offset of tree in file in blocks */
> #define V2_DQTREEDEPTH 4 /* Depth of quota tree */
> -#define V2_DQSTRINBLK ((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk)) /* Number of entries in one blocks */
>
> #endif /* _LINUX_QUOTAIO_V2_H */
>
>
> On Thursday 06 March 2008 17:48:24 Jan Kara wrote:
> > Hello,
> >
> > Sorry for not responding for a few days. I was busy with other things.
> >
> > On Thu 06-03-08 16:41:11, Andrew Perepechko wrote:
> > > We are in need of large (above 4 TB) block quota limits, but it seems like XFS filesystem
> > > (having its own quota implementation) is the only available fs that supports them. Currently
> > > ext3 supports up to 8 TB of data and forthcoming ext4 will support even more.
> > >
> > > Linux kernel has two implementations of quota format modules:
> > > quota_v1 (with QFMT_VFS_OLD id)
> > > quota_v2 (with QFMT_VFS_V0 id)
> > > Either uses 32-bit data types to store quota limits on disk
> > > (see struct v1_disk_dqblk and struct v2_disk_dqblk). Block quota limits
> > > are stored in 1kb units (QUOTABLOCK_SIZE constant) which gives
> > > the largest possible quota limit of (2^32-1)*2^10 bytes ~ 4 TB.
> > >
> > > In-memory quota entries representation suffers from the same 4 TB
> > > limitation (see struct mem_dqblk).
> > >
> > > The patch below adds a separate quota_v3 module which deals with 64-bit data to solve the problem
> > > (another possible approach is to merge the code into quota_v2 module to reuse some amount of the code -
> > > this won't reuse a lot because there're too many references to disk_dqblk structures and dependent constants).
> > >
> > > Could you comment on the patch and the idea behind it in general?
> > Just from a quick look. There seem to be actually two separate changes:
> > 1) Change current formats so that they refuse to set quota above treshold they
> > are able to handle. That's fine a we should do that (in a separate patch,
> > please).
> >
> > 2) Implement new format able to handle more that 4TB limits. In principle,
> > that is fine but vfsv0 format has actually been designed so that similar
> > changes can go mostly invisible for userspace (modulo some tools updates
> > etc.). Given that the format itself doesn't change that much, we definitely
> > do not need to introduce completely new quota format. I'd just increase the
> > version number. Also I'd like to avoid unnecessary code duplication. The
> > only thing that is really different are just the conversion routines from
> > disk to memory. So I'd just modify the code in fs/quota_v2.c so that it
> > supports both versions of the quota format - you need to parametrize macros
> > like GETENTRIES(), V2_DQSTRINBLK, ... (actually make inline functions of
> > them when we are changing it), probably make union of struct v2_disk_dqblk
> > including structures for both versions and change sizeof(struct
> > v2_disk_dqblk) to some function. But all this shouldn't be that hard to do
> > in a nice way...
> >
> > Honza
>
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
next prev parent reply other threads:[~2008-03-10 16:26 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-03-06 13:41 [RFC] quota: 64-bit limits with vfs Andrew Perepechko
2008-03-06 14:48 ` Jan Kara
2008-03-09 22:46 ` Andrew Perepechko
2008-03-10 16:26 ` Jan Kara [this message]
2008-03-10 17:13 ` Andrew Perepechko
2008-03-10 17:20 ` Jan Kara
2008-03-14 13:08 ` Andrew Perepechko
2008-03-15 4:23 ` Andreas Dilger
2008-03-15 13:24 ` Andrew Perepechko
2008-03-15 13:32 ` Andrew Perepechko
2008-03-15 14:45 ` Andreas Dilger
2008-03-15 18:58 ` [RFC] quota: 64-bit limits with vfs, updated Andrew Perepechko
2008-03-15 22:47 ` Andreas Dilger
2008-03-16 1:14 ` Andrew Perepechko
2008-03-16 11:21 ` Andrew Perepechko
2008-03-17 14:35 ` Jan Kara
2008-03-20 21:37 ` Andrew Perepechko
2008-03-21 1:04 ` Andreas Dilger
2008-03-21 9:14 ` Andrew Perepechko
2008-03-21 10:24 ` Andrew Perepechko
2008-03-17 14:51 ` Jan Kara
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080310162609.GA30435@duck.suse.cz \
--to=jack@suse.cz \
--cc=Alexey.Lyashkov@Sun.COM \
--cc=Andrew.Perepechko@Sun.COM \
--cc=Johann.Lombardi@Sun.COM \
--cc=Zhiyong.Tian@Sun.COM \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).