From: Theodore Ts'o <tytso@mit.edu>
To: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: linux-ext4@vger.kernel.org
Subject: Re: [PATCH 50/74] libext2fs: support allocating uninit blocks in bmap2()
Date: Wed, 15 Jan 2014 17:19:51 -0500 [thread overview]
Message-ID: <20140115221951.GA12226@thunk.org> (raw)
In-Reply-To: <20140115211122.GJ9229@birch.djwong.org>
On Wed, Jan 15, 2014 at 01:11:22PM -0800, Darrick J. Wong wrote:
> > The other thing to note about this patch is that if you want to
> > implement fallocate, ext2fs_bmap2() is really the wrong tool to use.
> > I've been working on a program for work which pre-creates a bunch of
>
> I think that ext2fs_fallocate would be a good addition to the library. Is your
> program far enough along to share? fuse2fs would benefit greatly.
An ext2fs_fallocate() is way more difficult than what I've done, since
you have to deal with all sorts of corner cases where the file has
pre-existing sparse extents, which may or may not be initialized, and
making sure that it works in that case. Allocating blocks to a file
which you know started as a zero length file is in fact much easier.
Here are the key bits from my program:
/*
* This should eventually be cleaned up and put into libext2fs
* This is much faster than calling ext2fs_block_alloc_stats() for each
* block, since it requires recalculating the bg descriptor checksum
* for every single block that you allocate.
*/
static void ext2fs_block_alloc_stats_range(ext2_filsys fs, blk64_t blk,
blk_t num, int inuse)
{
int group;
#ifndef OMIT_COM_ERR
if (blk + num >= ext2fs_blocks_count(fs->super)) {
com_err("ext2fs_block_alloc_stats_range", 0,
"Illegal block range: %llu (%u) ",
(unsigned long long) blk, num);
return;
}
#endif
if (inuse == 0)
return;
if (inuse > 0) {
ext2fs_mark_block_bitmap_range2(fs->block_map, blk, num);
inuse = 1;
} else {
ext2fs_unmark_block_bitmap_range2(fs->block_map, blk, num);
inuse = -1;
}
while (num) {
group = ext2fs_group_of_blk2(fs, blk);
blk64_t last_blk = ext2fs_group_last_block2(fs, group);
blk_t n = num;
if (blk + num > last_blk)
n = last_blk - blk + 1;
ext2fs_bg_free_blocks_count_set(fs, group,
ext2fs_bg_free_blocks_count(fs, group) -
inuse*n/EXT2FS_CLUSTER_RATIO(fs));
ext2fs_bg_flags_clear(fs, group, EXT2_BG_BLOCK_UNINIT);
ext2fs_group_desc_csum_set(fs, group);
ext2fs_free_blocks_count_add(fs->super, -inuse * n);
ext2fs_mark_super_dirty(fs);
ext2fs_mark_bb_dirty(fs);
blk += n;
num -= n;
}
}
/*
* ext2fs_allocate_tables() is not optimally allocating blocks in all
* situations. We need to take a look at this at some point. For
* now, just replace it with something simple and stupid.
*/
errcode_t my_allocate_tables(ext2_filsys fs)
{
errcode_t retval;
dgrp_t i;
for (i = 0; i < fs->group_desc_count; i++) {
retval = ext2fs_new_block2(fs, goal, NULL, &goal);
if (retval)
return retval;
ext2fs_block_alloc_stats2(fs, goal, +1);
ext2fs_block_bitmap_loc_set(fs, i, goal);
}
for (i = 0; i < fs->group_desc_count; i++) {
retval = ext2fs_new_block2(fs, goal, NULL, &goal);
if (retval)
return retval;
ext2fs_block_alloc_stats2(fs, goal, +1);
ext2fs_inode_bitmap_loc_set(fs, i, goal);
}
for (i = 0; i < fs->group_desc_count; i++) {
blk64_t end = ext2fs_blocks_count(fs->super) - 1;
retval = ext2fs_get_free_blocks2(fs, goal, end,
fs->inode_blocks_per_group,
fs->block_map, &goal);
if (retval)
return retval;
ext2fs_block_alloc_stats_range(fs, goal,
fs->inode_blocks_per_group, +1);
ext2fs_inode_table_loc_set(fs, i, goal);
}
return 0;
}
/*
* Some of this could eventually get turned into fallocate, but that's
* actually a much more difficult and tricking thing to implement.
*/
static errcode_t mk_hugefile(ext2_filsys fs, unsigned int num,
ext2_ino_t dir, int idx, ext2_ino_t *ino)
{
errcode_t retval;
blk64_t lblk, blk, bend;
__u64 size;
unsigned int i;
struct ext2_inode inode;
ext2_extent_handle_t handle;
char fn[32];
retval = ext2fs_new_inode(fs, 0, LINUX_S_IFREG, NULL, ino);
if (retval)
return retval;
memset(&inode, 0, sizeof(struct ext2_inode));
inode.i_mode = LINUX_S_IFREG | 0600;
ext2fs_iblk_set(fs, &inode, num / EXT2FS_CLUSTER_RATIO(fs));
size = (__u64) num * fs->blocksize;
inode.i_size = size & 0xffffffff;
inode.i_size_high = (size >> 32);
inode.i_links_count = 1;
retval = ext2fs_write_new_inode(fs, *ino, &inode);
if (retval)
return retval;
ext2fs_inode_alloc_stats2(fs, *ino, +1, 0);
retval = ext2fs_extent_open2(fs, *ino, &inode, &handle);
if (retval)
return retval;
{
struct ext2_inode t;
ext2fs_read_inode(fs, *ino, &t);
printf("eo: i_size_high: %lu size: %llu\n", t.i_size_high,
EXT2_I_SIZE(&t));
}
lblk = 0;
while (num) {
blk64_t pblk, end;
blk_t n = num;
retval = ext2fs_find_first_zero_block_bitmap2(fs->block_map,
goal, ext2fs_blocks_count(fs->super) - 1, &end);
if (retval)
return ENOSPC;
goal = end;
retval = ext2fs_find_first_set_block_bitmap2(fs->block_map, goal,
ext2fs_blocks_count(fs->super) - 1, &bend);
if (bend == ENOENT)
bend = ext2fs_blocks_count(fs->super);
if (bend - goal < num)
n = bend - goal;
printf("goal %llu bend %llu num %u n %u\n", goal, bend, num, n);
pblk = goal;
num -= n;
goal += n;
ext2fs_block_alloc_stats_range(fs, pblk, n, +1);
while (n) {
blk_t l = n;
struct ext2fs_extent newextent;
{
struct ext2_inode t;
ext2fs_read_inode(fs, *ino, &t);
printf("i_size_high: %lu size: %llu\n", t.i_size_high,
EXT2_I_SIZE(&t));
}
if (l > EXT_INIT_MAX_LEN)
l = EXT_INIT_MAX_LEN;
newextent.e_len = l;
newextent.e_pblk = pblk;
newextent.e_lblk = lblk;
newextent.e_flags = 0;
printf("inserting extent: %llu %llu %u\n", lblk, pblk, l);
retval = ext2fs_extent_insert(handle,
EXT2_EXTENT_INSERT_AFTER, &newextent);
if (retval)
return retval;
pblk += l;
lblk += l;
n -= l;
}
}
{
struct ext2_inode t;
ext2fs_read_inode(fs, *ino, &t);
printf("i_size_high: %lu size: %llu\n", t.i_size_high,
EXT2_I_SIZE(&t));
}
sprintf(fn, "hugefile%05d", idx);
retry:
retval = ext2fs_link(fs, dir, fn, *ino, EXT2_FT_REG_FILE);
if (retval == EXT2_ET_DIR_NO_SPACE) {
retval = ext2fs_expand_dir(fs, dir);
if (retval)
goto errout;
goto retry;
}
if (retval)
goto errout;
errout:
if (handle)
ext2fs_extent_free(handle);
return retval;
}
Note that this requires some of the test patches I've been sending
out, since it uses ext2fs_find_first_{set,zero}_block_bitmap2().
There are also some bugs in the versions which I sent out; I'm working
on fixing them....
> That said, I've also found a couple of bugs in the extent code by implementing
> fallocate in such a stupid way. :) It turns out that if (a) we need to split
> an extent into three pieces (say we write to a block in the middle of an
> unwritten extent and don't want to convert the whole extent) and (b) either of
> the extent_insert calls requires us to split the extent block and (c) we ENOSPC
> while trying to allocate a new extent block, we don't put the extent tree back
> the way it was before the split, and all the blocks after that point are lost.
Well, I found a bug in extfs_extent_insert() which showed up when I
tried to implement the block allocation in an intelligent way. :-)
I'll send out that bug fix a bit.
> I will send patches to avoid this corruption by checking for enough space soon.
> I think your local git tree has patches in it that aren't on kernel.org yet, so
> I'll hold off until I see them show up.
Yeah, some of those patches still need some clean up, so I haven't
pushed my maint branch to kernel.org yet.
But anyway, the above code will give you an idea where I'm going ---
this is **way** faster than trying to allocate blocks using the
set_bmap() function. :-)
- Ted
next prev parent reply other threads:[~2014-01-15 22:19 UTC|newest]
Thread overview: 150+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-11 1:18 [PATCH v3 00/74] e2fsprogs patchbomb 12/2013 Darrick J. Wong
2013-12-11 1:18 ` [PATCH 01/74] libext2fs: don't overflow when punching indirect blocks with large blocks Darrick J. Wong
2013-12-12 17:06 ` Theodore Ts'o
2013-12-11 1:18 ` [PATCH 02/74] libext2fs: fix tests that set LARGE_FILE Darrick J. Wong
2013-12-12 17:09 ` Theodore Ts'o
2013-12-11 1:18 ` [PATCH 03/74] mke2fs: load configfile blocksize setting before 64bit checks Darrick J. Wong
2013-12-12 17:27 ` Theodore Ts'o
2013-12-12 22:28 ` Andreas Dilger
2013-12-12 23:13 ` Darrick J. Wong
2013-12-12 23:14 ` [PATCH] mke2fs: clean up kernel version tests Darrick J. Wong
2013-12-11 1:18 ` [PATCH 04/74] libext2fs: use ext2fs_punch() to truncate quota file Darrick J. Wong
2013-12-12 17:28 ` Theodore Ts'o
2013-12-12 17:36 ` Theodore Ts'o
2013-12-12 20:07 ` Darrick J. Wong
2013-12-12 20:56 ` Theodore Ts'o
2013-12-12 21:10 ` Darrick J. Wong
2013-12-11 1:18 ` [PATCH 05/74] debugfs: fix init_filesys help text Darrick J. Wong
2013-12-12 17:37 ` Theodore Ts'o
2013-12-11 1:18 ` [PATCH 06/74] tune2fs: forbid changing uuid on an uninit_bg filesystem Darrick J. Wong
2013-12-15 2:02 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 07/74] libext2fs: tweak inline data error wording Darrick J. Wong
2013-12-13 4:33 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 08/74] libext2fs: don't allow ridiculously large logical block numbers Darrick J. Wong
2013-12-12 17:41 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 09/74] libext2fs: fix another minor grammatical error in the error catalog Darrick J. Wong
2013-12-12 17:42 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 10/74] debugfs: fix various minor bogosity Darrick J. Wong
2013-12-12 17:44 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 11/74] misc: use the checksum predicate function, not raw flag tests Darrick J. Wong
2013-12-13 4:34 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 12/74] libext2fs: make symlinks safe for 64bit blocks and extents Darrick J. Wong
2013-12-12 17:48 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 13/74] debugfs: handle 64bit block numbers Darrick J. Wong
2013-12-12 17:49 ` Theodore Ts'o
2013-12-17 17:01 ` Eric Sandeen
2013-12-11 1:19 ` [PATCH 14/74] libext2fs: fileio should use 64bit io routines Darrick J. Wong
2013-12-12 17:50 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 15/74] resize2fs: rewrite extent/dir/ea block checksums when migrating Darrick J. Wong
2013-12-13 4:35 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 16/74] debugfs: don't leak fd when calling dump_file Darrick J. Wong
2013-12-12 17:51 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 17/74] debugfs: don't leak mmp_s memory Darrick J. Wong
2013-12-12 17:52 ` Theodore Ts'o
2013-12-12 22:33 ` Andreas Dilger
2013-12-12 22:44 ` Darrick J. Wong
2013-12-11 1:20 ` [PATCH 18/74] e2fsck: fix memory leaks Darrick J. Wong
2013-12-12 17:58 ` Theodore Ts'o
2013-12-17 16:12 ` Eric Sandeen
2013-12-11 1:20 ` [PATCH 19/74] misc: don't leak file descriptors Darrick J. Wong
2013-12-12 18:06 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 20/74] mke2fs: don't leak memory Darrick J. Wong
2013-12-12 18:07 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 21/74] e4defrag: don't crash if umounts the filesystem races with us Darrick J. Wong
2013-12-12 18:08 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 22/74] e4defrag: defensively check results of sysconf(_SC_PAGESIZE) Darrick J. Wong
2013-12-12 18:09 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 23/74] e2image: check return value from check_if_mounted Darrick J. Wong
2013-12-12 18:09 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 24/74] dumpe2fs: check return values Darrick J. Wong
2013-12-12 18:10 ` Theodore Ts'o
2013-12-11 1:21 ` [PATCH 25/74] libss: fix fd error handling Darrick J. Wong
2013-12-12 18:11 ` Theodore Ts'o
2013-12-11 1:21 ` [PATCH 26/74] libss: fix memory handling errors Darrick J. Wong
2013-12-12 18:13 ` Theodore Ts'o
2013-12-17 17:04 ` Eric Sandeen
2013-12-18 22:23 ` Darrick J. Wong
2013-12-11 1:21 ` [PATCH 27/74] libquota: fix memory leak Darrick J. Wong
2013-12-12 18:14 ` Theodore Ts'o
2013-12-11 1:21 ` [PATCH 28/74] libext2fs: check return values Darrick J. Wong
2013-12-12 18:15 ` Theodore Ts'o
2013-12-17 16:57 ` Eric Sandeen
2013-12-17 16:59 ` Eric Sandeen
2013-12-11 1:21 ` [PATCH 29/74] libext2fs: fix memory leaks Darrick J. Wong
2013-12-12 18:17 ` Theodore Ts'o
2013-12-11 1:21 ` [PATCH 30/74] libext2fs: fix a broken close() test Darrick J. Wong
2013-12-12 18:18 ` Theodore Ts'o
2013-12-11 1:21 ` [PATCH 31/74] libext2fs: fail fileio write if we can't allocate a block Darrick J. Wong
2013-12-12 18:23 ` Theodore Ts'o
2013-12-11 1:21 ` [PATCH 32/74] libext2fs: fix punching extents when there are no left extents Darrick J. Wong
2013-12-12 18:25 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 33/74] libext2fs: don't error out when punching a totally sparse file Darrick J. Wong
2013-12-12 18:26 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 34/74] e2fsck: in rehash, mark newly allocated extent blocks as found Darrick J. Wong
2013-12-12 18:27 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 35/74] libext2fs: zero block contents past EOF when setting size Darrick J. Wong
2013-12-12 18:40 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 36/74] libext2fs: detect Darrick J. Wong
2013-12-13 4:39 ` Theodore Ts'o
2014-01-15 21:00 ` Darrick J. Wong
2013-12-11 1:22 ` [PATCH 37/74] libext2fs: don't always read backup group descriptors on a 1k-block meta_bg fs Darrick J. Wong
2014-01-11 18:59 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 38/74] libext2fs: mark group data blocks when loading block bitmap Darrick J. Wong
2014-01-11 19:08 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 39/74] e2fsck: remove uninit block bitmap calculation Darrick J. Wong
2014-01-11 19:08 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 40/74] libext2fs: no need to clear BLOCK_UNINIT during ext2fs_reserve_super_and_bgd Darrick J. Wong
2014-01-10 8:17 ` Akira Fujita
2014-01-11 19:18 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 41/74] tests: adjust test output to reflect block_uninit calculated block bitmaps Darrick J. Wong
2014-01-11 19:19 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 42/74] libext2fs: only punch complete clusters Darrick J. Wong
2013-12-16 4:52 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 43/74] libext2fs: don't update the summary counts when doing implied cluster allocation Darrick J. Wong
2013-12-16 4:53 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 44/74] e2fsck: only release clusters when shortening a directory during a rehash Darrick J. Wong
2013-12-16 4:55 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 45/74] e2fsck: print cluster ranges when encountering bitmap errors Darrick J. Wong
2013-12-16 4:55 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 46/74] e2fsck: try implied cluster allocation when expanding a dir Darrick J. Wong
2013-12-16 4:56 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 47/74] resize2fs: during shrink, don't free in-use bg data clusters Darrick J. Wong
2013-12-16 5:01 ` Theodore Ts'o
2013-12-16 20:10 ` Darrick J. Wong
2014-02-24 1:39 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 48/74] resize2fs: don't free in-use clusters when moving blocks Darrick J. Wong
2014-02-24 1:56 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 49/74] mke2fs: set block_validity as a default mount option Darrick J. Wong
2013-12-11 1:23 ` [PATCH 50/74] libext2fs: support allocating uninit blocks in bmap2() Darrick J. Wong
2014-01-11 22:57 ` Theodore Ts'o
2014-01-15 21:11 ` Darrick J. Wong
2014-01-15 22:19 ` Theodore Ts'o [this message]
2014-01-15 22:23 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 51/74] libext2fs: file IO routines should handle uninit blocks Darrick J. Wong
2013-12-11 1:24 ` [PATCH 52/74] resize2fs: convert fs to and from 64bit mode Darrick J. Wong
2013-12-11 1:24 ` [PATCH 53/74] resize2fs: when toggling 64bit, don't free in-use bg data clusters Darrick J. Wong
2013-12-11 1:24 ` [PATCH 54/74] resize2fs: adjust reserved_gdt_blocks when changing group descriptor size Darrick J. Wong
2013-12-11 1:24 ` [PATCH 55/74] libext2fs: support modifying arbitrary extended attributes Darrick J. Wong
2014-02-24 4:09 ` Theodore Ts'o
2013-12-11 1:24 ` [PATCH 56/74] libext2fs: various tweaks to the xattr editor APIs Darrick J. Wong
2014-02-24 4:10 ` Theodore Ts'o
2013-12-11 1:24 ` [PATCH 57/74] libext2fs: extend xattr api to query number of attrs Darrick J. Wong
2014-02-24 4:10 ` Theodore Ts'o
2013-12-11 1:24 ` [PATCH 58/74] libext2fs: free key/value pairs before reading Darrick J. Wong
2014-02-24 4:10 ` Theodore Ts'o
2013-12-11 1:24 ` [PATCH 59/74] debugfs: dump all extended attributes Darrick J. Wong
2014-02-24 4:10 ` Theodore Ts'o
2013-12-11 1:24 ` [PATCH 60/74] libext2fs: ensure that inline data is always written to ibody Darrick J. Wong
2013-12-11 1:25 ` [PATCH 61/74] libext2fs: fix ext2fs_open2() truncation of the superblock parameter Darrick J. Wong
2013-12-11 1:25 ` [PATCH 62/74] misc: add fuse2fs, a FUSE server for e2fsprogs Darrick J. Wong
2013-12-11 1:25 ` [PATCH 63/74] fuse2fs: translate ACL structures Darrick J. Wong
2013-12-11 1:25 ` [PATCH 64/74] Subject: [PATCH] fuse2fs: support allocating uninit blocks in fallocate Darrick J. Wong
2013-12-11 1:25 ` [PATCH 65/74] fuse2fs: handle 64-bit dates correctly Darrick J. Wong
2013-12-11 1:25 ` [PATCH 67/74] tests: check correct handling of reading and writing uninit extents Darrick J. Wong
2013-12-11 1:25 ` [PATCH 68/74] tests: Add block_validity speed test Darrick J. Wong
2013-12-11 1:26 ` [PATCH 69/74] Subject: [PATCH] tests: test what happens if we run out of space Darrick J. Wong
2013-12-11 1:26 ` [PATCH 70/74] tests: add stale data after truncate test Darrick J. Wong
2013-12-11 1:26 ` [PATCH 71/74] tests: check mapping of really high logical block offsets Darrick J. Wong
2013-12-11 1:26 ` [PATCH 72/74] Subject: [PATCH] tests: enable using fuse2fs with metadata checksum test Darrick J. Wong
2013-12-11 1:26 ` [PATCH 73/74] tests: add large symlink test Darrick J. Wong
2013-12-11 1:26 ` [PATCH 74/74] tests: test date handling Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140115221951.GA12226@thunk.org \
--to=tytso@mit.edu \
--cc=darrick.wong@oracle.com \
--cc=linux-ext4@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).