From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@kernel.org,
torvalds@linux-foundation.org
Cc: Justin Forbes <jmforbes@linuxtx.org>,
Zwane Mwaikambo <zwane@arm.linux.org.uk>,
"Theodore Ts'o" <tytso@mit.edu>,
Randy Dunlap <rdunlap@xenotime.net>,
Dave Jones <davej@redhat.com>,
Chuck Wolber <chuckw@quantumlinux.com>,
Chris Wedgwood <reviews@ml.cw.f00f.org>,
Michael Krufky <mkrufky@linuxtv.org>,
Chuck Ebbert <cebbert@redhat.com>,
Domenico Andreoli <cavokz@gmail.com>,
akpm@linux-foundation.org, alan@lxorguk.ukuu.org.uk,
sandeen@redhat.com, hooanon05@yahoo.co.jp,
linux-ext4@vger.kernel.org, adilger@clusterfs.com
Subject: [27/50] ext34: ensure do_split leaves enough free space in both blocks
Date: Mon, 24 Sep 2007 09:21:31 -0700 [thread overview]
Message-ID: <20070924162131.GB13510@kroah.com> (raw)
In-Reply-To: <20070924161733.GA13510@kroah.com>
[-- Attachment #1: ext34-ensure-do_split-leaves-enough-free-space-in-both-blocks.patch --]
[-- Type: text/plain, Size: 8558 bytes --]
From: Eric Sandeen <sandeen@redhat.com>
commit ef2b02d3e617cb0400eedf2668f86215e1b0e6af in mainline.
The do_split() function for htree dir blocks is intended to split a leaf
block to make room for a new entry. It sorts the entries in the original
block by hash value, then moves the last half of the entries to the new
block - without accounting for how much space this actually moves. (IOW,
it moves half of the entry *count* not half of the entry *space*). If by
chance we have both large & small entries, and we move only the smallest
entries, and we have a large new entry to insert, we may not have created
enough space for it.
The patch below stores each record size when calculating the dx_map, and
then walks the hash-sorted dx_map, calculating how many entries must be
moved to more evenly split the existing entries between the old block and
the new block, guaranteeing enough space for the new entry.
The dx_map "offs" member is reduced to u16 so that the overall map size
does not change - it is temporarily stored at the end of the new block, and
if it grows too large it may be overwritten. By making offs and size both
u16, we won't grow the map size.
Also add a few comments to the functions involved.
This fixes the testcase reported by hooanon05@yahoo.co.jp on the
linux-ext4 list, "ext3 dir_index causes an error"
Thanks to Andreas Dilger for discussing the problem & solution with me.
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Tested-by: Junjiro Okajima <hooanon05@yahoo.co.jp>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: ext4 <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
fs/ext3/namei.c | 39 +++++++++++++++++++++++++++++++++++----
fs/ext4/namei.c | 39 +++++++++++++++++++++++++++++++++++----
2 files changed, 70 insertions(+), 8 deletions(-)
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
struct dx_map_entry
{
u32 hash;
- u32 offs;
+ u16 offs;
+ u16 size;
};
#ifdef CONFIG_EXT3_INDEX
@@ -671,6 +672,10 @@ errout:
* Directory block splitting, compacting
*/
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
{
@@ -684,7 +689,8 @@ static int dx_make_map (struct ext3_dir_
ext3fs_dirhash(de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
- map_tail->offs = (u32) ((char *) de - base);
+ map_tail->offs = (u16) ((char *) de - base);
+ map_tail->size = le16_to_cpu(de->rec_len);
count++;
cond_resched();
}
@@ -694,6 +700,7 @@ static int dx_make_map (struct ext3_dir_
return count;
}
+/* Sort map by hash value */
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1081,6 +1088,10 @@ static inline void ext3_set_de_type(stru
}
#ifdef CONFIG_EXT3_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
static struct ext3_dir_entry_2 *
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
{
@@ -1099,6 +1110,10 @@ dx_move_dirents(char *from, char *to, st
return (struct ext3_dir_entry_2 *) (to - rec_len);
}
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
{
struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
@@ -1121,6 +1136,11 @@ static struct ext3_dir_entry_2* dx_pack_
return prev;
}
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct buffer_head **bh,struct dx_frame *frame,
struct dx_hash_info *hinfo, int *error)
@@ -1132,7 +1152,7 @@ static struct ext3_dir_entry_2 *do_split
u32 hash2;
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
- unsigned split;
+ unsigned split, move, size, i;
struct ext3_dir_entry_2 *de = NULL, *de2;
int err = 0;
@@ -1160,8 +1180,19 @@ static struct ext3_dir_entry_2 *do_split
count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
blocksize, hinfo, map);
map -= count;
- split = count/2; // need to adjust to actual middle
dx_sort_map (map, count);
+ /* Split the existing block in the middle, size-wise */
+ size = 0;
+ move = 0;
+ for (i = count-1; i >= 0; i--) {
+ /* is more than half of this entry in 2nd half of the block? */
+ if (size + map[i].size/2 > blocksize/2)
+ break;
+ size += map[i].size;
+ move++;
+ }
+ /* map index at which we will split */
+ split = count - move;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk("Split block %i at %x, %i/%i\n",
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
struct dx_map_entry
{
u32 hash;
- u32 offs;
+ u16 offs;
+ u16 size;
};
#ifdef CONFIG_EXT4_INDEX
@@ -671,6 +672,10 @@ errout:
* Directory block splitting, compacting
*/
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
{
@@ -684,7 +689,8 @@ static int dx_make_map (struct ext4_dir_
ext4fs_dirhash(de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
- map_tail->offs = (u32) ((char *) de - base);
+ map_tail->offs = (u16) ((char *) de - base);
+ map_tail->size = le16_to_cpu(de->rec_len);
count++;
cond_resched();
}
@@ -694,6 +700,7 @@ static int dx_make_map (struct ext4_dir_
return count;
}
+/* Sort map by hash value */
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1079,6 +1086,10 @@ static inline void ext4_set_de_type(stru
}
#ifdef CONFIG_EXT4_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
static struct ext4_dir_entry_2 *
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
{
@@ -1097,6 +1108,10 @@ dx_move_dirents(char *from, char *to, st
return (struct ext4_dir_entry_2 *) (to - rec_len);
}
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
{
struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
@@ -1119,6 +1134,11 @@ static struct ext4_dir_entry_2* dx_pack_
return prev;
}
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct buffer_head **bh,struct dx_frame *frame,
struct dx_hash_info *hinfo, int *error)
@@ -1130,7 +1150,7 @@ static struct ext4_dir_entry_2 *do_split
u32 hash2;
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
- unsigned split;
+ unsigned split, move, size, i;
struct ext4_dir_entry_2 *de = NULL, *de2;
int err = 0;
@@ -1158,8 +1178,19 @@ static struct ext4_dir_entry_2 *do_split
count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
blocksize, hinfo, map);
map -= count;
- split = count/2; // need to adjust to actual middle
dx_sort_map (map, count);
+ /* Split the existing block in the middle, size-wise */
+ size = 0;
+ move = 0;
+ for (i = count-1; i >= 0; i--) {
+ /* is more than half of this entry in 2nd half of the block? */
+ if (size + map[i].size/2 > blocksize/2)
+ break;
+ size += map[i].size;
+ move++;
+ }
+ /* map index at which we will split */
+ split = count - move;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk("Split block %i at %x, %i/%i\n",
--
next prev parent reply other threads:[~2007-09-24 16:34 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20070924161246.983665021@mini.kroah.org>
2007-09-24 16:17 ` [00/50] 2.6.22-stable review Greg KH
2007-09-24 16:19 ` [01/50] V4L: ivtv: fix VIDIOC_S_FBUF: new OSD values were never set Greg KH
2007-09-24 16:19 ` [02/50] DVB: get_dvb_firmware: update script for new location of sp8870 firmware Greg KH
2007-09-24 16:19 ` [03/50] DVB: get_dvb_firmware: update script for new location of tda10046 firmware Greg KH
2007-09-24 16:19 ` [04/50] DVB: b2c2-flexcop: fix Airstar HD5000 tuning regression Greg KH
2007-09-24 16:20 ` [05/50] setpgid(child) fails if the child was forked by sub-thread Greg KH
2007-09-24 16:20 ` [06/50] sigqueue_free: fix the race with collect_signal() Greg KH
2007-09-24 16:20 ` [07/50] kconfig: oldconfig shall not set symbols if it does not need to Greg KH
2007-09-24 16:20 ` [08/50] MTD: Makefile fix for mtdsuper Greg KH
2007-09-24 16:20 ` [09/50] USB: fix linked list insertion bugfix for usb core Greg KH
2007-09-24 16:20 ` [10/50] ACPI: Validate XSDT, use RSDT if XSDT fails Greg KH
2007-09-24 16:20 ` [11/50] POWERPC: Flush registers to proper task context Greg KH
2007-09-24 16:20 ` [12/50] 3w-9xxx: Fix dma mask setting Greg KH
2007-09-24 16:20 ` [13/50] MTD: Initialise s_flags in get_sb_mtd_aux() Greg KH
2007-09-24 16:20 ` [14/50] JFFS2: fix write deadlock regression Greg KH
2007-09-24 16:20 ` [15/50] V4L: cx88: Avoid a NULL pointer dereference during mpeg_open() Greg KH
2007-09-24 16:20 ` [16/50] hwmon: End of I/O region off-by-one Greg KH
2007-09-24 16:20 ` [17/50] Fix debug regression in video/pwc Greg KH
2007-09-24 16:20 ` [18/50] splice: fix direct splice error handling Greg KH
2007-09-24 16:21 ` [19/50] rpc: fix garbage in printk in svc_tcp_accept() Greg KH
2007-09-24 16:21 ` [20/50] disable sys_timerfd() Greg KH
2007-09-24 16:21 ` [21/50] afs: mntput called before dput Greg KH
2007-09-24 16:21 ` [22/50] Fix DAC960 driver on machines which dont support 64-bit DMA Greg KH
2007-09-24 16:21 ` [23/50] Fix "Fix DAC960 driver on machines which dont support 64-bit DMA" Greg KH
2007-09-24 16:21 ` [24/50] firewire: fw-ohci: ignore failure of pci_set_power_state (fix suspend regression) Greg KH
2007-09-24 16:21 ` [25/50] futex_compat: fix list traversal bugs Greg KH
2007-09-24 16:21 ` [26/50] Leases can be hidden by flocks Greg KH
2007-09-24 16:21 ` Greg KH [this message]
2007-09-24 16:21 ` [28/50] nfs: fix oops re sysctls and V4 support Greg KH
2007-09-24 16:21 ` [29/50] dir_index: error out instead of BUG on corrupt dx dirs Greg KH
2007-09-24 16:21 ` [30/50] ieee1394: ohci1394: fix initialization if built non-modular Greg KH
2007-09-24 16:21 ` [31/50] Correctly close old nfsd/lockd sockets Greg KH
2007-09-24 16:21 ` [32/50] Fix race with shared tag queue maps Greg KH
2007-09-24 16:21 ` [33/50] crypto: blkcipher_get_spot() handling of buffer at end of page Greg KH
2007-09-24 16:21 ` [34/50] fix realtek phy id in forcedeth Greg KH
2007-09-24 16:21 ` [35/50] Fix decnet device address listing Greg KH
2007-09-24 16:22 ` [36/50] Fix device address listing for ipv4 Greg KH
2007-09-24 16:22 ` [37/50] Fix inet_diag OOPS Greg KH
2007-09-24 22:03 ` Dan Merillat
2007-09-25 4:03 ` Patrick McHardy
2007-09-24 16:22 ` [38/50] Fix IPV6 append OOPS Greg KH
2007-09-24 16:22 ` [39/50] Fix IPSEC AH4 options handling Greg KH
2007-09-24 16:22 ` [40/50] Fix ipv6 double-sock-release with MSG_CONFIRM Greg KH
2007-09-24 16:22 ` [41/50] : Fix IPV6 DAD handling Greg KH
2007-09-24 16:22 ` [42/50] Fix ipv6 source address handling Greg KH
2007-09-24 22:05 ` roel
2007-09-24 16:22 ` [43/50] Fix oops in vlan and bridging code Greg KH
2007-09-24 16:22 ` [44/50] Fix tc_ematch kbuild Greg KH
2007-09-24 16:22 ` [45/50] Handle snd_una in tcp_cwnd_down() Greg KH
2007-09-24 16:22 ` [46/50] Fix TCP DSACK cwnd handling Greg KH
2007-09-24 16:22 ` [47/50] Fix datagram recvmsg NULL iov handling regression Greg KH
2007-09-24 16:22 ` [48/50] Fix pktgen src_mac handling Greg KH
2007-09-24 16:22 ` [49/50] Fix sparc64 v100 platform booting Greg KH
2007-09-24 16:22 ` [50/50] bcm43xx: Fix cancellation of work queue crashes Greg KH
2007-09-24 16:31 ` [00/50] 2.6.22-stable review Greg KH
2007-09-24 16:44 ` Chris Wedgwood
2007-09-24 16:46 ` Chris Wedgwood
2007-09-24 17:14 ` Greg KH
2007-09-24 17:13 ` Greg KH
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070924162131.GB13510@kroah.com \
--to=gregkh@suse.de \
--cc=adilger@clusterfs.com \
--cc=akpm@linux-foundation.org \
--cc=alan@lxorguk.ukuu.org.uk \
--cc=cavokz@gmail.com \
--cc=cebbert@redhat.com \
--cc=chuckw@quantumlinux.com \
--cc=davej@redhat.com \
--cc=hooanon05@yahoo.co.jp \
--cc=jmforbes@linuxtx.org \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mkrufky@linuxtv.org \
--cc=rdunlap@xenotime.net \
--cc=reviews@ml.cw.f00f.org \
--cc=sandeen@redhat.com \
--cc=stable@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=tytso@mit.edu \
--cc=zwane@arm.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox