From: Theodore Ts'o <tytso@mit.edu>
To: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: linux-ext4@vger.kernel.org
Subject: Re: [PATCH 50/74] libext2fs: support allocating uninit blocks in bmap2()
Date: Wed, 15 Jan 2014 17:19:51 -0500 [thread overview]
Message-ID: <20140115221951.GA12226@thunk.org> (raw)
In-Reply-To: <20140115211122.GJ9229@birch.djwong.org>
On Wed, Jan 15, 2014 at 01:11:22PM -0800, Darrick J. Wong wrote:
> > The other thing to note about this patch is that if you want to
> > implement fallocate, ext2fs_bmap2() is really the wrong tool to use.
> > I've been working on a program for work which pre-creates a bunch of
>
> I think that ext2fs_fallocate would be a good addition to the library. Is your
> program far enough along to share? fuse2fs would benefit greatly.
An ext2fs_fallocate() is way more difficult than what I've done, since
you have to deal with all sorts of corner cases where the file has
pre-existing sparse extents, which may or may not be initialized, and
making sure that it works in that case. Allocating blocks to a file
which you know started as a zero length file is in fact much easier.
Here are the key bits from my program:
/*
* This should eventually be cleaned up and put into libext2fs
* This is much faster than calling ext2fs_block_alloc_stats() for each
* block, since it requires recalculating the bg descriptor checksum
* for every single block that you allocate.
*/
static void ext2fs_block_alloc_stats_range(ext2_filsys fs, blk64_t blk,
blk_t num, int inuse)
{
int group;
#ifndef OMIT_COM_ERR
if (blk + num >= ext2fs_blocks_count(fs->super)) {
com_err("ext2fs_block_alloc_stats_range", 0,
"Illegal block range: %llu (%u) ",
(unsigned long long) blk, num);
return;
}
#endif
if (inuse == 0)
return;
if (inuse > 0) {
ext2fs_mark_block_bitmap_range2(fs->block_map, blk, num);
inuse = 1;
} else {
ext2fs_unmark_block_bitmap_range2(fs->block_map, blk, num);
inuse = -1;
}
while (num) {
group = ext2fs_group_of_blk2(fs, blk);
blk64_t last_blk = ext2fs_group_last_block2(fs, group);
blk_t n = num;
if (blk + num > last_blk)
n = last_blk - blk + 1;
ext2fs_bg_free_blocks_count_set(fs, group,
ext2fs_bg_free_blocks_count(fs, group) -
inuse*n/EXT2FS_CLUSTER_RATIO(fs));
ext2fs_bg_flags_clear(fs, group, EXT2_BG_BLOCK_UNINIT);
ext2fs_group_desc_csum_set(fs, group);
ext2fs_free_blocks_count_add(fs->super, -inuse * n);
ext2fs_mark_super_dirty(fs);
ext2fs_mark_bb_dirty(fs);
blk += n;
num -= n;
}
}
/*
* ext2fs_allocate_tables() is not optimally allocating blocks in all
* situations. We need to take a look at this at some point. For
* now, just replace it with something simple and stupid.
*/
errcode_t my_allocate_tables(ext2_filsys fs)
{
errcode_t retval;
dgrp_t i;
for (i = 0; i < fs->group_desc_count; i++) {
retval = ext2fs_new_block2(fs, goal, NULL, &goal);
if (retval)
return retval;
ext2fs_block_alloc_stats2(fs, goal, +1);
ext2fs_block_bitmap_loc_set(fs, i, goal);
}
for (i = 0; i < fs->group_desc_count; i++) {
retval = ext2fs_new_block2(fs, goal, NULL, &goal);
if (retval)
return retval;
ext2fs_block_alloc_stats2(fs, goal, +1);
ext2fs_inode_bitmap_loc_set(fs, i, goal);
}
for (i = 0; i < fs->group_desc_count; i++) {
blk64_t end = ext2fs_blocks_count(fs->super) - 1;
retval = ext2fs_get_free_blocks2(fs, goal, end,
fs->inode_blocks_per_group,
fs->block_map, &goal);
if (retval)
return retval;
ext2fs_block_alloc_stats_range(fs, goal,
fs->inode_blocks_per_group, +1);
ext2fs_inode_table_loc_set(fs, i, goal);
}
return 0;
}
/*
* Some of this could eventually get turned into fallocate, but that's
* actually a much more difficult and tricking thing to implement.
*/
static errcode_t mk_hugefile(ext2_filsys fs, unsigned int num,
ext2_ino_t dir, int idx, ext2_ino_t *ino)
{
errcode_t retval;
blk64_t lblk, blk, bend;
__u64 size;
unsigned int i;
struct ext2_inode inode;
ext2_extent_handle_t handle;
char fn[32];
retval = ext2fs_new_inode(fs, 0, LINUX_S_IFREG, NULL, ino);
if (retval)
return retval;
memset(&inode, 0, sizeof(struct ext2_inode));
inode.i_mode = LINUX_S_IFREG | 0600;
ext2fs_iblk_set(fs, &inode, num / EXT2FS_CLUSTER_RATIO(fs));
size = (__u64) num * fs->blocksize;
inode.i_size = size & 0xffffffff;
inode.i_size_high = (size >> 32);
inode.i_links_count = 1;
retval = ext2fs_write_new_inode(fs, *ino, &inode);
if (retval)
return retval;
ext2fs_inode_alloc_stats2(fs, *ino, +1, 0);
retval = ext2fs_extent_open2(fs, *ino, &inode, &handle);
if (retval)
return retval;
{
struct ext2_inode t;
ext2fs_read_inode(fs, *ino, &t);
printf("eo: i_size_high: %lu size: %llu\n", t.i_size_high,
EXT2_I_SIZE(&t));
}
lblk = 0;
while (num) {
blk64_t pblk, end;
blk_t n = num;
retval = ext2fs_find_first_zero_block_bitmap2(fs->block_map,
goal, ext2fs_blocks_count(fs->super) - 1, &end);
if (retval)
return ENOSPC;
goal = end;
retval = ext2fs_find_first_set_block_bitmap2(fs->block_map, goal,
ext2fs_blocks_count(fs->super) - 1, &bend);
if (bend == ENOENT)
bend = ext2fs_blocks_count(fs->super);
if (bend - goal < num)
n = bend - goal;
printf("goal %llu bend %llu num %u n %u\n", goal, bend, num, n);
pblk = goal;
num -= n;
goal += n;
ext2fs_block_alloc_stats_range(fs, pblk, n, +1);
while (n) {
blk_t l = n;
struct ext2fs_extent newextent;
{
struct ext2_inode t;
ext2fs_read_inode(fs, *ino, &t);
printf("i_size_high: %lu size: %llu\n", t.i_size_high,
EXT2_I_SIZE(&t));
}
if (l > EXT_INIT_MAX_LEN)
l = EXT_INIT_MAX_LEN;
newextent.e_len = l;
newextent.e_pblk = pblk;
newextent.e_lblk = lblk;
newextent.e_flags = 0;
printf("inserting extent: %llu %llu %u\n", lblk, pblk, l);
retval = ext2fs_extent_insert(handle,
EXT2_EXTENT_INSERT_AFTER, &newextent);
if (retval)
return retval;
pblk += l;
lblk += l;
n -= l;
}
}
{
struct ext2_inode t;
ext2fs_read_inode(fs, *ino, &t);
printf("i_size_high: %lu size: %llu\n", t.i_size_high,
EXT2_I_SIZE(&t));
}
sprintf(fn, "hugefile%05d", idx);
retry:
retval = ext2fs_link(fs, dir, fn, *ino, EXT2_FT_REG_FILE);
if (retval == EXT2_ET_DIR_NO_SPACE) {
retval = ext2fs_expand_dir(fs, dir);
if (retval)
goto errout;
goto retry;
}
if (retval)
goto errout;
errout:
if (handle)
ext2fs_extent_free(handle);
return retval;
}
Note that this requires some of the test patches I've been sending
out, since it uses ext2fs_find_first_{set,zero}_block_bitmap2().
There are also some bugs in the versions which I sent out; I'm working
on fixing them....
> That said, I've also found a couple of bugs in the extent code by implementing
> fallocate in such a stupid way. :) It turns out that if (a) we need to split
> an extent into three pieces (say we write to a block in the middle of an
> unwritten extent and don't want to convert the whole extent) and (b) either of
> the extent_insert calls requires us to split the extent block and (c) we ENOSPC
> while trying to allocate a new extent block, we don't put the extent tree back
> the way it was before the split, and all the blocks after that point are lost.
Well, I found a bug in extfs_extent_insert() which showed up when I
tried to implement the block allocation in an intelligent way. :-)
I'll send out that bug fix a bit.
> I will send patches to avoid this corruption by checking for enough space soon.
> I think your local git tree has patches in it that aren't on kernel.org yet, so
> I'll hold off until I see them show up.
Yeah, some of those patches still need some clean up, so I haven't
pushed my maint branch to kernel.org yet.
But anyway, the above code will give you an idea where I'm going ---
this is **way** faster than trying to allocate blocks using the
set_bmap() function. :-)
- Ted
next prev parent reply other threads:[~2014-01-15 22:19 UTC|newest]
Thread overview: 150+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-11 1:18 [PATCH v3 00/74] e2fsprogs patchbomb 12/2013 Darrick J. Wong
2013-12-11 1:18 ` [PATCH 01/74] libext2fs: don't overflow when punching indirect blocks with large blocks Darrick J. Wong
2013-12-12 17:06 ` Theodore Ts'o
2013-12-11 1:18 ` [PATCH 02/74] libext2fs: fix tests that set LARGE_FILE Darrick J. Wong
2013-12-12 17:09 ` Theodore Ts'o
2013-12-11 1:18 ` [PATCH 03/74] mke2fs: load configfile blocksize setting before 64bit checks Darrick J. Wong
2013-12-12 17:27 ` Theodore Ts'o
2013-12-12 22:28 ` Andreas Dilger
2013-12-12 23:13 ` Darrick J. Wong
2013-12-12 23:14 ` [PATCH] mke2fs: clean up kernel version tests Darrick J. Wong
2013-12-11 1:18 ` [PATCH 04/74] libext2fs: use ext2fs_punch() to truncate quota file Darrick J. Wong
2013-12-12 17:28 ` Theodore Ts'o
2013-12-12 17:36 ` Theodore Ts'o
2013-12-12 20:07 ` Darrick J. Wong
2013-12-12 20:56 ` Theodore Ts'o
2013-12-12 21:10 ` Darrick J. Wong
2013-12-11 1:18 ` [PATCH 05/74] debugfs: fix init_filesys help text Darrick J. Wong
2013-12-12 17:37 ` Theodore Ts'o
2013-12-11 1:18 ` [PATCH 06/74] tune2fs: forbid changing uuid on an uninit_bg filesystem Darrick J. Wong
2013-12-15 2:02 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 07/74] libext2fs: tweak inline data error wording Darrick J. Wong
2013-12-13 4:33 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 08/74] libext2fs: don't allow ridiculously large logical block numbers Darrick J. Wong
2013-12-12 17:41 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 09/74] libext2fs: fix another minor grammatical error in the error catalog Darrick J. Wong
2013-12-12 17:42 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 10/74] debugfs: fix various minor bogosity Darrick J. Wong
2013-12-12 17:44 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 11/74] misc: use the checksum predicate function, not raw flag tests Darrick J. Wong
2013-12-13 4:34 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 12/74] libext2fs: make symlinks safe for 64bit blocks and extents Darrick J. Wong
2013-12-12 17:48 ` Theodore Ts'o
2013-12-11 1:19 ` [PATCH 13/74] debugfs: handle 64bit block numbers Darrick J. Wong
2013-12-12 17:49 ` Theodore Ts'o
2013-12-17 17:01 ` Eric Sandeen
2013-12-11 1:19 ` [PATCH 14/74] libext2fs: fileio should use 64bit io routines Darrick J. Wong
2013-12-12 17:50 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 15/74] resize2fs: rewrite extent/dir/ea block checksums when migrating Darrick J. Wong
2013-12-13 4:35 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 16/74] debugfs: don't leak fd when calling dump_file Darrick J. Wong
2013-12-12 17:51 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 17/74] debugfs: don't leak mmp_s memory Darrick J. Wong
2013-12-12 17:52 ` Theodore Ts'o
2013-12-12 22:33 ` Andreas Dilger
2013-12-12 22:44 ` Darrick J. Wong
2013-12-11 1:20 ` [PATCH 18/74] e2fsck: fix memory leaks Darrick J. Wong
2013-12-12 17:58 ` Theodore Ts'o
2013-12-17 16:12 ` Eric Sandeen
2013-12-11 1:20 ` [PATCH 19/74] misc: don't leak file descriptors Darrick J. Wong
2013-12-12 18:06 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 20/74] mke2fs: don't leak memory Darrick J. Wong
2013-12-12 18:07 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 21/74] e4defrag: don't crash if umounts the filesystem races with us Darrick J. Wong
2013-12-12 18:08 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 22/74] e4defrag: defensively check results of sysconf(_SC_PAGESIZE) Darrick J. Wong
2013-12-12 18:09 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 23/74] e2image: check return value from check_if_mounted Darrick J. Wong
2013-12-12 18:09 ` Theodore Ts'o
2013-12-11 1:20 ` [PATCH 24/74] dumpe2fs: check return values Darrick J. Wong
2013-12-12 18:10 ` Theodore Ts'o
2013-12-11 1:21 ` [PATCH 25/74] libss: fix fd error handling Darrick J. Wong
2013-12-12 18:11 ` Theodore Ts'o
2013-12-11 1:21 ` [PATCH 26/74] libss: fix memory handling errors Darrick J. Wong
2013-12-12 18:13 ` Theodore Ts'o
2013-12-17 17:04 ` Eric Sandeen
2013-12-18 22:23 ` Darrick J. Wong
2013-12-11 1:21 ` [PATCH 27/74] libquota: fix memory leak Darrick J. Wong
2013-12-12 18:14 ` Theodore Ts'o
2013-12-11 1:21 ` [PATCH 28/74] libext2fs: check return values Darrick J. Wong
2013-12-12 18:15 ` Theodore Ts'o
2013-12-17 16:57 ` Eric Sandeen
2013-12-17 16:59 ` Eric Sandeen
2013-12-11 1:21 ` [PATCH 29/74] libext2fs: fix memory leaks Darrick J. Wong
2013-12-12 18:17 ` Theodore Ts'o
2013-12-11 1:21 ` [PATCH 30/74] libext2fs: fix a broken close() test Darrick J. Wong
2013-12-12 18:18 ` Theodore Ts'o
2013-12-11 1:21 ` [PATCH 31/74] libext2fs: fail fileio write if we can't allocate a block Darrick J. Wong
2013-12-12 18:23 ` Theodore Ts'o
2013-12-11 1:21 ` [PATCH 32/74] libext2fs: fix punching extents when there are no left extents Darrick J. Wong
2013-12-12 18:25 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 33/74] libext2fs: don't error out when punching a totally sparse file Darrick J. Wong
2013-12-12 18:26 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 34/74] e2fsck: in rehash, mark newly allocated extent blocks as found Darrick J. Wong
2013-12-12 18:27 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 35/74] libext2fs: zero block contents past EOF when setting size Darrick J. Wong
2013-12-12 18:40 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 36/74] libext2fs: detect Darrick J. Wong
2013-12-13 4:39 ` Theodore Ts'o
2014-01-15 21:00 ` Darrick J. Wong
2013-12-11 1:22 ` [PATCH 37/74] libext2fs: don't always read backup group descriptors on a 1k-block meta_bg fs Darrick J. Wong
2014-01-11 18:59 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 38/74] libext2fs: mark group data blocks when loading block bitmap Darrick J. Wong
2014-01-11 19:08 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 39/74] e2fsck: remove uninit block bitmap calculation Darrick J. Wong
2014-01-11 19:08 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 40/74] libext2fs: no need to clear BLOCK_UNINIT during ext2fs_reserve_super_and_bgd Darrick J. Wong
2014-01-10 8:17 ` Akira Fujita
2014-01-11 19:18 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 41/74] tests: adjust test output to reflect block_uninit calculated block bitmaps Darrick J. Wong
2014-01-11 19:19 ` Theodore Ts'o
2013-12-11 1:22 ` [PATCH 42/74] libext2fs: only punch complete clusters Darrick J. Wong
2013-12-16 4:52 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 43/74] libext2fs: don't update the summary counts when doing implied cluster allocation Darrick J. Wong
2013-12-16 4:53 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 44/74] e2fsck: only release clusters when shortening a directory during a rehash Darrick J. Wong
2013-12-16 4:55 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 45/74] e2fsck: print cluster ranges when encountering bitmap errors Darrick J. Wong
2013-12-16 4:55 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 46/74] e2fsck: try implied cluster allocation when expanding a dir Darrick J. Wong
2013-12-16 4:56 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 47/74] resize2fs: during shrink, don't free in-use bg data clusters Darrick J. Wong
2013-12-16 5:01 ` Theodore Ts'o
2013-12-16 20:10 ` Darrick J. Wong
2014-02-24 1:39 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 48/74] resize2fs: don't free in-use clusters when moving blocks Darrick J. Wong
2014-02-24 1:56 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 49/74] mke2fs: set block_validity as a default mount option Darrick J. Wong
2013-12-11 1:23 ` [PATCH 50/74] libext2fs: support allocating uninit blocks in bmap2() Darrick J. Wong
2014-01-11 22:57 ` Theodore Ts'o
2014-01-15 21:11 ` Darrick J. Wong
2014-01-15 22:19 ` Theodore Ts'o [this message]
2014-01-15 22:23 ` Theodore Ts'o
2013-12-11 1:23 ` [PATCH 51/74] libext2fs: file IO routines should handle uninit blocks Darrick J. Wong
2013-12-11 1:24 ` [PATCH 52/74] resize2fs: convert fs to and from 64bit mode Darrick J. Wong
2013-12-11 1:24 ` [PATCH 53/74] resize2fs: when toggling 64bit, don't free in-use bg data clusters Darrick J. Wong
2013-12-11 1:24 ` [PATCH 54/74] resize2fs: adjust reserved_gdt_blocks when changing group descriptor size Darrick J. Wong
2013-12-11 1:24 ` [PATCH 55/74] libext2fs: support modifying arbitrary extended attributes Darrick J. Wong
2014-02-24 4:09 ` Theodore Ts'o
2013-12-11 1:24 ` [PATCH 56/74] libext2fs: various tweaks to the xattr editor APIs Darrick J. Wong
2014-02-24 4:10 ` Theodore Ts'o
2013-12-11 1:24 ` [PATCH 57/74] libext2fs: extend xattr api to query number of attrs Darrick J. Wong
2014-02-24 4:10 ` Theodore Ts'o
2013-12-11 1:24 ` [PATCH 58/74] libext2fs: free key/value pairs before reading Darrick J. Wong
2014-02-24 4:10 ` Theodore Ts'o
2013-12-11 1:24 ` [PATCH 59/74] debugfs: dump all extended attributes Darrick J. Wong
2014-02-24 4:10 ` Theodore Ts'o
2013-12-11 1:24 ` [PATCH 60/74] libext2fs: ensure that inline data is always written to ibody Darrick J. Wong
2013-12-11 1:25 ` [PATCH 61/74] libext2fs: fix ext2fs_open2() truncation of the superblock parameter Darrick J. Wong
2013-12-11 1:25 ` [PATCH 62/74] misc: add fuse2fs, a FUSE server for e2fsprogs Darrick J. Wong
2013-12-11 1:25 ` [PATCH 63/74] fuse2fs: translate ACL structures Darrick J. Wong
2013-12-11 1:25 ` [PATCH 64/74] Subject: [PATCH] fuse2fs: support allocating uninit blocks in fallocate Darrick J. Wong
2013-12-11 1:25 ` [PATCH 65/74] fuse2fs: handle 64-bit dates correctly Darrick J. Wong
2013-12-11 1:25 ` [PATCH 67/74] tests: check correct handling of reading and writing uninit extents Darrick J. Wong
2013-12-11 1:25 ` [PATCH 68/74] tests: Add block_validity speed test Darrick J. Wong
2013-12-11 1:26 ` [PATCH 69/74] Subject: [PATCH] tests: test what happens if we run out of space Darrick J. Wong
2013-12-11 1:26 ` [PATCH 70/74] tests: add stale data after truncate test Darrick J. Wong
2013-12-11 1:26 ` [PATCH 71/74] tests: check mapping of really high logical block offsets Darrick J. Wong
2013-12-11 1:26 ` [PATCH 72/74] Subject: [PATCH] tests: enable using fuse2fs with metadata checksum test Darrick J. Wong
2013-12-11 1:26 ` [PATCH 73/74] tests: add large symlink test Darrick J. Wong
2013-12-11 1:26 ` [PATCH 74/74] tests: test date handling Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140115221951.GA12226@thunk.org \
--to=tytso@mit.edu \
--cc=darrick.wong@oracle.com \
--cc=linux-ext4@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.