* [PATCH] uninitialized block groups
@ 2007-08-23 5:41 Avantika Mathur
[not found] ` <20070828201840.GX5377@schatzie.adilger.int>
0 siblings, 1 reply; 3+ messages in thread
From: Avantika Mathur @ 2007-08-23 5:41 UTC (permalink / raw)
To: Andreas Dilger, linux-ext4
[-- Attachment #1: Type: text/plain, Size: 405 bytes --]
Below is an updated version of the uninitialized block groups patch,
ported to the end of the patch queue. There are small changes to the
previous version:
- consistent format of the group_desc structure with the e2fsprogs patches
- do not verify group_desc checksums if the feature is not enabled
I have tested the patch with the e2fsprogs patches sent by Kalpak, and
ran fsx.
thanks
Avantika
---
[-- Attachment #2: uninit --]
[-- Type: text/plain, Size: 20486 bytes --]
From: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Avantika Mathur <mathur@us.ibm.com>
---
Index: linux-2.6.23-rc3/fs/ext4/balloc.c
===================================================================
--- linux-2.6.23-rc3.orig/fs/ext4/balloc.c 2007-08-21 17:31:50.000000000 -0700
+++ linux-2.6.23-rc3/fs/ext4/balloc.c 2007-08-21 17:31:51.000000000 -0700
@@ -20,6 +20,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
+#include "group.h"
/*
* balloc.c contains the blocks allocation and deallocation routines
*/
@@ -42,6 +43,75 @@ void ext4_get_group_no_and_offset(struct
}
+/* Initializes an uninitialized block bitmap if given, and returns the
+ * number of blocks free in the group. */
+unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+ int block_group, struct ext4_group_desc *gdp)
+{
+ unsigned long start;
+ int bit, bit_max;
+ unsigned free_blocks;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (bh) {
+ J_ASSERT_BH(bh, buffer_locked(bh));
+
+ /* If checksum is bad mark all blocks use to prevent allocation,
+ * essentially implementing a per-group read-only flag. */
+ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+ ext4_error(sb, __FUNCTION__,
+ "Checksum bad for group %u\n", block_group);
+ gdp->bg_free_blocks_count = 0;
+ gdp->bg_free_inodes_count = 0;
+ gdp->bg_itable_unused = 0;
+ memset(bh->b_data, 0xff, sb->s_blocksize);
+ return 0;
+ }
+ memset(bh->b_data, 0, sb->s_blocksize);
+ }
+
+ /* Check for superblock and gdt backups in this group */
+ bit_max = ext4_bg_has_super(sb, block_group);
+
+ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
+ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
+ sbi->s_desc_per_block) {
+ if (bit_max) {
+ bit_max += ext4_bg_num_gdb(sb, block_group);
+ bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
+ }
+ } else { /* For META_BG_BLOCK_GROUPS */
+ int group_rel = (block_group -
+ le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
+ EXT4_DESC_PER_BLOCK(sb);
+ if (group_rel == 0 || group_rel == 1 ||
+ (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
+ bit_max += 1;
+ }
+
+ /* Last and first groups are always initialized */
+ free_blocks = EXT4_BLOCKS_PER_GROUP(sb) - bit_max;
+
+ if (bh) {
+ for (bit = 0; bit < bit_max; bit++)
+ ext4_set_bit(bit, bh->b_data);
+
+ start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
+ le32_to_cpu(sbi->s_es->s_first_data_block);
+
+ /* Set bits for block and inode bitmaps, and inode table */
+ ext4_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - start,
+ bh->b_data);
+ ext4_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - start,
+ bh->b_data);
+ for (bit = le32_to_cpu(gdp->bg_inode_table) - start,
+ bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
+ ext4_set_bit(bit, bh->b_data);
+ }
+
+ return free_blocks - sbi->s_itb_per_group - 2;
+}
+
/*
* The free blocks are managed by bitmaps. A file system contains several
* blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
@@ -116,10 +186,22 @@ read_block_bitmap(struct super_block *sb
struct ext4_group_desc * desc;
struct buffer_head * bh = NULL;
- desc = ext4_get_group_desc (sb, block_group, NULL);
+ desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
goto error_out;
- bh = sb_bread(sb, ext4_block_bitmap(sb, desc));
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap));
+ if (!buffer_uptodate(bh)) {
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ ext4_init_block_bitmap(sb, bh,block_group,desc);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ }
+ } else {
+ bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
+ }
if (!bh)
ext4_error (sb, "read_block_bitmap",
"Cannot read block bitmap - "
@@ -588,6 +670,7 @@ do_more:
desc->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
group_freed);
+ desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_mod(&sbi->s_freeblocks_counter, count);
@@ -1656,8 +1739,11 @@ allocated:
ret_block, goal_hits, goal_attempts);
spin_lock(sb_bgl_lock(sbi, group_no));
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
spin_unlock(sb_bgl_lock(sbi, group_no));
percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
Index: linux-2.6.23-rc3/fs/ext4/group.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.23-rc3/fs/ext4/group.h 2007-08-22 21:30:55.000000000 -0700
@@ -0,0 +1,29 @@
+/*
+ * linux/fs/ext4/group.h
+ *
+ * Copyright (C) 2007 Cluster File Systems, Inc
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#ifndef _LINUX_EXT4_GROUP_H
+#define _LINUX_EXT4_GROUP_H
+#if defined(CONFIG_CRC16)
+#include <linux/crc16.h>
+#endif
+
+extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
+ struct ext4_group_desc *gdp);
+extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
+ struct ext4_group_desc *gdp);
+struct buffer_head *read_block_bitmap(struct super_block *sb,
+ unsigned int block_group);
+extern unsigned ext4_init_block_bitmap(struct super_block *sb,
+ struct buffer_head *bh, int group,
+ struct ext4_group_desc *desc);
+#define ext4_free_blocks_after_init(sb, group, desc) \
+ ext4_init_block_bitmap(sb, NULL, group, desc)
+extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
+ struct buffer_head *bh, int group,
+ struct ext4_group_desc *desc);
+#endif /* _LINUX_EXT4_GROUP_H */
Index: linux-2.6.23-rc3/fs/ext4/ialloc.c
===================================================================
--- linux-2.6.23-rc3.orig/fs/ext4/ialloc.c 2007-08-12 21:25:24.000000000 -0700
+++ linux-2.6.23-rc3/fs/ext4/ialloc.c 2007-08-21 17:31:51.000000000 -0700
@@ -28,6 +28,7 @@
#include "xattr.h"
#include "acl.h"
+#include "group.h"
/*
* ialloc.c contains the inodes allocation and deallocation routines
@@ -43,6 +44,52 @@
* the free blocks count in the block.
*/
+/*
+ * To avoid calling the atomic setbit hundreds or thousands of times, we only
+ * need to use it within a single byte (to ensure we get endianness right).
+ * We can use memset for the rest of the bitmap as there are no other users.
+ */
+static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+{
+ int i;
+
+ if (start_bit >= end_bit)
+ return;
+
+ ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
+ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
+ ext4_set_bit(i, bitmap);
+ if (i < end_bit)
+ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
+}
+
+/* Initializes an uninitialized inode bitmap */
+unsigned ext4_init_inode_bitmap(struct super_block *sb,
+ struct buffer_head *bh, int block_group,
+ struct ext4_group_desc *gdp)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ J_ASSERT_BH(bh, buffer_locked(bh));
+
+ /* If checksum is bad mark all blocks and inodes use to prevent
+ * allocation, essentially implementing a per-group read-only flag. */
+ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+ ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
+ block_group);
+ gdp->bg_free_blocks_count = 0;
+ gdp->bg_free_inodes_count = 0;
+ gdp->bg_itable_unused = 0;
+ memset(bh->b_data, 0xff, sb->s_blocksize);
+ return 0;
+ }
+
+ memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
+ mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
+ bh->b_data);
+
+ return EXT4_INODES_PER_GROUP(sb);
+}
/*
* Read the inode allocation bitmap for a given block_group, reading
@@ -59,8 +106,19 @@ read_inode_bitmap(struct super_block * s
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
goto error_out;
-
- bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap));
+ if (!buffer_uptodate(bh)) {
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ ext4_init_inode_bitmap(sb, bh,block_group,desc);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ }
+ } else {
+ bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
+ }
if (!bh)
ext4_error(sb, "read_inode_bitmap",
"Cannot read inode bitmap - "
@@ -169,6 +227,8 @@ void ext4_free_inode (handle_t *handle,
if (is_directory)
gdp->bg_used_dirs_count = cpu_to_le16(
le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi,block_group,
+ gdp);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_inc(&sbi->s_freeinodes_counter);
if (is_directory)
@@ -438,7 +498,7 @@ struct inode *ext4_new_inode(handle_t *h
struct ext4_sb_info *sbi;
int err = 0;
struct inode *ret;
- int i;
+ int i, free = 0;
/* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink)
@@ -520,11 +580,13 @@ repeat_in_this_group:
goto out;
got:
- ino += group * EXT4_INODES_PER_GROUP(sb) + 1;
- if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
- ext4_error (sb, "ext4_new_inode",
- "reserved inode or inode > inodes count - "
- "block_group = %d, inode=%lu", group, ino);
+ ino++;
+ if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
+ ino > EXT4_INODES_PER_GROUP(sb)) {
+ ext4_error(sb, __FUNCTION__,
+ "reserved inode or inode > inodes count - "
+ "block_group = %d, inode=%lu", group,
+ ino + group * EXT4_INODES_PER_GROUP(sb));
err = -EIO;
goto fail;
}
@@ -532,13 +594,65 @@ got:
BUFFER_TRACE(bh2, "get_write_access");
err = ext4_journal_get_write_access(handle, bh2);
if (err) goto fail;
+
+ /* We may have to initialize the block bitmap if it isn't already */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
+ gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ struct buffer_head *block_bh = read_block_bitmap(sb, group);
+
+ BUFFER_TRACE(block_bh, "get block bitmap access");
+ err = ext4_journal_get_write_access(handle, block_bh);
+ if (err) {
+ brelse(block_bh);
+ goto fail;
+ }
+
+ free = 0;
+ spin_lock(sb_bgl_lock(sbi, group));
+ /* recheck and clear flag under lock if we still need to */
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+ free = ext4_free_blocks_after_init(sb, group, gdp);
+ gdp->bg_free_blocks_count = cpu_to_le16(free);
+ }
+ spin_unlock(sb_bgl_lock(sbi, group));
+
+ /* Don't need to dirty bitmap block if we didn't change it */
+ if (free) {
+ BUFFER_TRACE(block_bh, "dirty block bitmap");
+ err = ext4_journal_dirty_metadata(handle, block_bh);
+ }
+
+ brelse(block_bh);
+ if (err)
+ goto fail;
+ }
+
spin_lock(sb_bgl_lock(sbi, group));
+ /* If we didn't allocate from within the initialized part of the inode
+ * table then we need to initialize up to this inode. */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+ free = EXT4_INODES_PER_GROUP(sb);
+ } else {
+ free = EXT4_INODES_PER_GROUP(sb) -
+ le16_to_cpu(gdp->bg_itable_unused);
+ }
+
+ if (ino > free) {
+ gdp->bg_itable_unused =
+ cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
+ }
+ }
+
gdp->bg_free_inodes_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
if (S_ISDIR(mode)) {
gdp->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
}
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
spin_unlock(sb_bgl_lock(sbi, group));
BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
err = ext4_journal_dirty_metadata(handle, bh2);
@@ -560,7 +674,7 @@ got:
inode->i_gid = current->fsgid;
inode->i_mode = mode;
- inode->i_ino = ino;
+ inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
Index: linux-2.6.23-rc3/fs/ext4/resize.c
===================================================================
--- linux-2.6.23-rc3.orig/fs/ext4/resize.c 2007-08-12 21:25:24.000000000 -0700
+++ linux-2.6.23-rc3/fs/ext4/resize.c 2007-08-21 17:31:51.000000000 -0700
@@ -16,6 +16,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
+#include "group.h"
#define outside(b, first, last) ((b) < (first) || (b) >= (last))
#define inside(b, first, last) ((b) >= (first) && (b) < (last))
@@ -842,6 +843,7 @@ int ext4_group_add(struct super_block *s
ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
/*
* Make the new blocks and inodes valid next. We do this before
Index: linux-2.6.23-rc3/fs/ext4/super.c
===================================================================
--- linux-2.6.23-rc3.orig/fs/ext4/super.c 2007-08-21 17:31:50.000000000 -0700
+++ linux-2.6.23-rc3/fs/ext4/super.c 2007-08-22 21:32:27.000000000 -0700
@@ -43,6 +43,7 @@
#include "xattr.h"
#include "acl.h"
#include "namei.h"
+#include "group.h"
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
@@ -1271,6 +1272,94 @@ static int ext4_setup_super(struct super
return res;
}
+#if !defined(CONFIG_CRC16)
+/** CRC table for the CRC-16. The poly is 0x8005 (x16 + x15 + x2 + 1) */
+__u16 const crc16_table[256] = {
+ 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
+ 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
+ 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
+ 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
+ 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
+ 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
+ 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
+ 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
+ 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
+ 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441,
+ 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41,
+ 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840,
+ 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41,
+ 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40,
+ 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640,
+ 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041,
+ 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240,
+ 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
+ 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41,
+ 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840,
+ 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41,
+ 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40,
+ 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640,
+ 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041,
+ 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241,
+ 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440,
+ 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
+ 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841,
+ 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40,
+ 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41,
+ 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
+ 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040
+};
+
+static inline __u16 crc16_byte(__u16 crc, const __u8 data)
+{
+ return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff];
+}
+
+__u16 crc16(__u16 crc, __u8 const *buffer, size_t len)
+{
+ while (len--)
+ crc = crc16_byte(crc, *buffer++);
+ return crc;
+}
+#endif
+
+__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
+ struct ext4_group_desc *gdp)
+{
+ __u16 crc = 0;
+
+ if (sbi->s_es->s_feature_ro_compat &
+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ int offset = offsetof(struct ext4_group_desc, bg_checksum);
+ __le32 le_group = cpu_to_le32(block_group);
+
+ crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
+ crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
+ crc = crc16(crc, (__u8 *)gdp, offset);
+ offset += sizeof(gdp->bg_checksum); /* skip checksum */
+ /* for checksum of struct ext4_group_desc do the rest...*/
+ if ((sbi->s_es->s_feature_incompat &
+ cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
+ offset < le16_to_cpu(sbi->s_es->s_desc_size)) {
+ crc = crc16(crc, (__u8 *)gdp + offset,
+ le16_to_cpu(sbi->s_es->s_desc_size)
+ - offset);
+ }
+ }
+
+ return cpu_to_le16(crc);
+}
+
+int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
+ struct ext4_group_desc *gdp)
+{
+ if ((sbi->s_es->s_feature_ro_compat &
+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
+ (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
+ return 0;
+
+ return 1;
+}
+
/* Called at mount-time, super-block is locked */
static int ext4_check_descriptors (struct super_block * sb)
{
@@ -1325,6 +1414,13 @@ static int ext4_check_descriptors (struc
i, inode_table);
return 0;
}
+ if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
+ ext4_error(sb, __FUNCTION__,
+ "Checksum for group %d failed (%u!=%u)\n", i,
+ le16_to_cpu(ext4_group_desc_csum(sbi,i,gdp)),
+ le16_to_cpu(gdp->bg_checksum));
+ return 0;
+ }
first_block += EXT4_BLOCKS_PER_GROUP(sb);
gdp = (struct ext4_group_desc *)
((__u8 *)gdp + EXT4_DESC_SIZE(sb));
Index: linux-2.6.23-rc3/include/linux/ext4_fs.h
===================================================================
--- linux-2.6.23-rc3.orig/include/linux/ext4_fs.h 2007-08-21 17:31:50.000000000 -0700
+++ linux-2.6.23-rc3/include/linux/ext4_fs.h 2007-08-21 17:31:51.000000000 -0700
@@ -150,19 +150,25 @@ struct ext4_allocation_request {
*/
struct ext4_group_desc
{
- __le32 bg_block_bitmap; /* Blocks bitmap block */
- __le32 bg_inode_bitmap; /* Inodes bitmap block */
+ __le32 bg_block_bitmap; /* Blocks bitmap block */
+ __le32 bg_inode_bitmap; /* Inodes bitmap block */
__le32 bg_inode_table; /* Inodes table block */
__le16 bg_free_blocks_count; /* Free blocks count */
__le16 bg_free_inodes_count; /* Free inodes count */
__le16 bg_used_dirs_count; /* Directories count */
__u16 bg_flags;
- __u32 bg_reserved[3];
+ __u32 bg_reserved[2];
+ __le16 bg_itable_unused; /* Unused inodes count */
+ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
__le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
__le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
__le32 bg_inode_table_hi; /* Inodes table block MSB */
};
+#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
+#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
+#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
+
#ifdef __KERNEL__
#include <linux/ext4_fs_i.h>
#include <linux/ext4_fs_sb.h>
@@ -734,6 +740,7 @@ static inline int ext4_valid_inum(struct
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
+#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
@@ -755,6 +762,7 @@ static inline int ext4_valid_inum(struct
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
/*
^ permalink raw reply [flat|nested] 3+ messages in thread[parent not found: <20070828201840.GX5377@schatzie.adilger.int>]
* Re: [PATCH] uninitialized block groups [not found] ` <20070828201840.GX5377@schatzie.adilger.int> @ 2007-08-31 0:46 ` Avantika Mathur 2007-09-07 23:06 ` Avantika Mathur 1 sibling, 0 replies; 3+ messages in thread From: Avantika Mathur @ 2007-08-31 0:46 UTC (permalink / raw) To: Andreas Dilger; +Cc: linux-ext4 [-- Attachment #1: Type: text/plain, Size: 718 bytes --] Andreas Dilger wrote: > On Aug 22, 2007 22:41 -0700, Avantika Mathur wrote: > >> Below is an updated version of the uninitialized block groups patch, >> ported to the end of the patch queue. There are small changes to the >> previous version: >> - consistent format of the group_desc structure with the e2fsprogs patches >> - do not verify group_desc checksums if the feature is not enabled >> > > Can you please keep the original description intact with the patch, so > it is useful for Andrew when it is submitted upstream: Thank you for the comments Andreas. Here is the patch updated with your comments, and ported to the latest ext4 patch queue after the journal_checkusm patches. Avantika --- [-- Attachment #2: ext4_uninit_blockgroup.patch --] [-- Type: text/x-patch, Size: 21471 bytes --] From: Andreas Dilger <adilger@clusterfs.com> In pass1 of e2fsck, every inode table is scanned and checked, regardless of whether it is in use. This is this most time consuming part of the filesystem check. The unintialized block group feature can greatly reduce e2fsck time by eliminating checking of uninitialized inodes. There is a high water mark of used inodes for each block group. Block and inode bitmaps can be uninitialized on disk via a flag in the group descriptor to avoid reading or scanning them at e2fsck time. A checksum of each group descriptor is used to ensure that corruption in the group descriptor's bit flags does not cause incorrect operation. Signed-off-by: Avantika Mathur <mathur@us.ibm.com> Signed-off-by: Andreas Dilger <adilger@clusterfs.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> --- fs/ext4/balloc.c | 94 +++++++++++++++++++++++++++++++++- fs/ext4/group.h | 29 ++++++++++ fs/ext4/ialloc.c | 132 ++++++++++++++++++++++++++++++++++++++++++++---- fs/ext4/resize.c | 2 fs/ext4/super.c | 96 ++++++++++++++++++++++++++++++++++ include/linux/ext4_fs.h | 14 ++++- 6 files changed, 352 insertions(+), 15 deletions(-) Index: linux-2.6.23-rc4/fs/ext4/balloc.c =================================================================== --- linux-2.6.23-rc4.orig/fs/ext4/balloc.c 2007-08-27 18:32:35.000000000 -0700 +++ linux-2.6.23-rc4/fs/ext4/balloc.c 2007-08-30 17:26:22.000000000 -0700 @@ -20,6 +20,7 @@ #include <linux/quotaops.h> #include <linux/buffer_head.h> +#include "group.h" /* * balloc.c contains the blocks allocation and deallocation routines */ @@ -42,6 +43,74 @@ } +/* Initializes an uninitialized block bitmap if given, and returns the + * number of blocks free in the group. */ +unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, + int block_group, struct ext4_group_desc *gdp) +{ + unsigned long start; + int bit, bit_max; + unsigned free_blocks; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (bh) { + J_ASSERT_BH(bh, buffer_locked(bh)); + + /* If checksum is bad mark all blocks used to prevent allocation + * essentially implementing a per-group read-only flag. */ + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { + ext4_error(sb, __FUNCTION__, + "Checksum bad for group %u\n", block_group); + gdp->bg_free_blocks_count = 0; + gdp->bg_free_inodes_count = 0; + gdp->bg_itable_unused = 0; + memset(bh->b_data, 0xff, sb->s_blocksize); + return 0; + } + memset(bh->b_data, 0, sb->s_blocksize); + } + + /* Check for superblock and gdt backups in this group */ + bit_max = ext4_bg_has_super(sb, block_group); + + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || + block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * + sbi->s_desc_per_block) { + if (bit_max) { + bit_max += ext4_bg_num_gdb(sb, block_group); + bit_max += + le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); + } + } else { /* For META_BG_BLOCK_GROUPS */ + int group_rel = (block_group - + le32_to_cpu(sbi->s_es->s_first_meta_bg)) % + EXT4_DESC_PER_BLOCK(sb); + if (group_rel == 0 || group_rel == 1 || + (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1)) + bit_max += 1; + } + + /* Last and first groups are always initialized */ + free_blocks = EXT4_BLOCKS_PER_GROUP(sb) - bit_max; + + if (bh) { + for (bit = 0; bit < bit_max; bit++) + ext4_set_bit(bit, bh->b_data); + + start = block_group * EXT4_BLOCKS_PER_GROUP(sb) + + le32_to_cpu(sbi->s_es->s_first_data_block); + + /* Set bits for block and inode bitmaps, and inode table */ + ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); + ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data); + for (bit = le32_to_cpu(gdp->bg_inode_table) - start, + bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++) + ext4_set_bit(bit, bh->b_data); + } + + return free_blocks - sbi->s_itb_per_group - 2; +} + /* * The free blocks are managed by bitmaps. A file system contains several * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap @@ -110,16 +179,29 @@ * * Return buffer_head on success or NULL in case of failure. */ -static struct buffer_head * +struct buffer_head * read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext4_group_desc * desc; struct buffer_head * bh = NULL; - desc = ext4_get_group_desc (sb, block_group, NULL); + desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) goto error_out; - bh = sb_bread(sb, ext4_block_bitmap(sb, desc)); + if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + bh = sb_getblk(sb, ext4_block_bitmap(sb, desc)); + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + ext4_init_block_bitmap(sb, bh, block_group, + desc); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); + } + } else { + bh = sb_bread(sb, ext4_block_bitmap(sb,desc)); + } if (!bh) ext4_error (sb, "read_block_bitmap", "Cannot read block bitmap - " @@ -586,6 +668,7 @@ desc->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) + group_freed); + desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_mod(&sbi->s_freeblocks_counter, count); @@ -1644,8 +1727,11 @@ ret_block, goal_hits, goal_attempts); spin_lock(sb_bgl_lock(sbi, group_no)); + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num); + gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); spin_unlock(sb_bgl_lock(sbi, group_no)); percpu_counter_mod(&sbi->s_freeblocks_counter, -num); Index: linux-2.6.23-rc4/fs/ext4/group.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.23-rc4/fs/ext4/group.h 2007-08-30 17:02:37.000000000 -0700 @@ -0,0 +1,29 @@ +/* + * linux/fs/ext4/group.h + * + * Copyright (C) 2007 Cluster File Systems, Inc + * + * Author: Andreas Dilger <adilger@clusterfs.com> + */ + +#ifndef _LINUX_EXT4_GROUP_H +#define _LINUX_EXT4_GROUP_H +#if defined(CONFIG_CRC16) +#include <linux/crc16.h> +#endif + +extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, + struct ext4_group_desc *gdp); +extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, + struct ext4_group_desc *gdp); +struct buffer_head *read_block_bitmap(struct super_block *sb, + unsigned int block_group); +extern unsigned ext4_init_block_bitmap(struct super_block *sb, + struct buffer_head *bh, int group, + struct ext4_group_desc *desc); +#define ext4_free_blocks_after_init(sb, group, desc) \ + ext4_init_block_bitmap(sb, NULL, group, desc) +extern unsigned ext4_init_inode_bitmap(struct super_block *sb, + struct buffer_head *bh, int group, + struct ext4_group_desc *desc); +#endif /* _LINUX_EXT4_GROUP_H */ Index: linux-2.6.23-rc4/fs/ext4/ialloc.c =================================================================== --- linux-2.6.23-rc4.orig/fs/ext4/ialloc.c 2007-08-27 18:32:35.000000000 -0700 +++ linux-2.6.23-rc4/fs/ext4/ialloc.c 2007-08-30 17:02:37.000000000 -0700 @@ -28,6 +28,7 @@ #include "xattr.h" #include "acl.h" +#include "group.h" /* * ialloc.c contains the inodes allocation and deallocation routines @@ -43,6 +44,52 @@ * the free blocks count in the block. */ +/* + * To avoid calling the atomic setbit hundreds or thousands of times, we only + * need to use it within a single byte (to ensure we get endianness right). + * We can use memset for the rest of the bitmap as there are no other users. + */ +static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) +{ + int i; + + if (start_bit >= end_bit) + return; + + ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); + for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) + ext4_set_bit(i, bitmap); + if (i < end_bit) + memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); +} + +/* Initializes an uninitialized inode bitmap */ +unsigned ext4_init_inode_bitmap(struct super_block *sb, + struct buffer_head *bh, int block_group, + struct ext4_group_desc *gdp) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + J_ASSERT_BH(bh, buffer_locked(bh)); + + /* If checksum is bad mark all blocks and inodes use to prevent + * allocation, essentially implementing a per-group read-only flag. */ + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { + ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n", + block_group); + gdp->bg_free_blocks_count = 0; + gdp->bg_free_inodes_count = 0; + gdp->bg_itable_unused = 0; + memset(bh->b_data, 0xff, sb->s_blocksize); + return 0; + } + + memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), + bh->b_data); + + return EXT4_INODES_PER_GROUP(sb); +} /* * Read the inode allocation bitmap for a given block_group, reading @@ -59,8 +106,20 @@ desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) goto error_out; - - bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); + if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc)); + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + ext4_init_inode_bitmap(sb, bh, block_group, + desc); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); + } + } else { + bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); + } if (!bh) ext4_error(sb, "read_inode_bitmap", "Cannot read inode bitmap - " @@ -169,6 +228,8 @@ if (is_directory) gdp->bg_used_dirs_count = cpu_to_le16( le16_to_cpu(gdp->bg_used_dirs_count) - 1); + gdp->bg_checksum = ext4_group_desc_csum(sbi, + block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_inc(&sbi->s_freeinodes_counter); if (is_directory) @@ -438,7 +499,7 @@ struct ext4_sb_info *sbi; int err = 0; struct inode *ret; - int i; + int i, free = 0; /* Cannot create files in a deleted directory */ if (!dir || !dir->i_nlink) @@ -520,11 +581,13 @@ goto out; got: - ino += group * EXT4_INODES_PER_GROUP(sb) + 1; - if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext4_error (sb, "ext4_new_inode", - "reserved inode or inode > inodes count - " - "block_group = %d, inode=%lu", group, ino); + ino++; + if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || + ino > EXT4_INODES_PER_GROUP(sb)) { + ext4_error(sb, __FUNCTION__, + "reserved inode or inode > inodes count - " + "block_group = %d, inode=%lu", group, + ino + group * EXT4_INODES_PER_GROUP(sb)); err = -EIO; goto fail; } @@ -532,13 +595,64 @@ BUFFER_TRACE(bh2, "get_write_access"); err = ext4_journal_get_write_access(handle, bh2); if (err) goto fail; + + /* We may have to initialize the block bitmap if it isn't already */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && + gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + struct buffer_head *block_bh = read_block_bitmap(sb, group); + + BUFFER_TRACE(block_bh, "get block bitmap access"); + err = ext4_journal_get_write_access(handle, block_bh); + if (err) { + brelse(block_bh); + goto fail; + } + + free = 0; + spin_lock(sb_bgl_lock(sbi, group)); + /* recheck and clear flag under lock if we still need to */ + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + free = ext4_free_blocks_after_init(sb, group, gdp); + gdp->bg_free_blocks_count = cpu_to_le16(free); + } + spin_unlock(sb_bgl_lock(sbi, group)); + + /* Don't need to dirty bitmap block if we didn't change it */ + if (free) { + BUFFER_TRACE(block_bh, "dirty block bitmap"); + err = ext4_journal_dirty_metadata(handle, block_bh); + } + + brelse(block_bh); + if (err) + goto fail; + } + spin_lock(sb_bgl_lock(sbi, group)); + /* If we didn't allocate from within the initialized part of the inode + * table then we need to initialize up to this inode. */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); + free = EXT4_INODES_PER_GROUP(sb); + } else { + free = EXT4_INODES_PER_GROUP(sb) - + le16_to_cpu(gdp->bg_itable_unused); + } + + if (ino > free) + gdp->bg_itable_unused = + cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); + } + gdp->bg_free_inodes_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); if (S_ISDIR(mode)) { gdp->bg_used_dirs_count = cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); } + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); spin_unlock(sb_bgl_lock(sbi, group)); BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); err = ext4_journal_dirty_metadata(handle, bh2); @@ -560,7 +674,7 @@ inode->i_gid = current->fsgid; inode->i_mode = mode; - inode->i_ino = ino; + inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = Index: linux-2.6.23-rc4/fs/ext4/resize.c =================================================================== --- linux-2.6.23-rc4.orig/fs/ext4/resize.c 2007-08-27 18:32:35.000000000 -0700 +++ linux-2.6.23-rc4/fs/ext4/resize.c 2007-08-30 17:02:37.000000000 -0700 @@ -16,6 +16,7 @@ #include <linux/errno.h> #include <linux/slab.h> +#include "group.h" #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) @@ -842,6 +843,7 @@ ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); + gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); /* * Make the new blocks and inodes valid next. We do this before Index: linux-2.6.23-rc4/fs/ext4/super.c =================================================================== --- linux-2.6.23-rc4.orig/fs/ext4/super.c 2007-08-30 17:02:36.000000000 -0700 +++ linux-2.6.23-rc4/fs/ext4/super.c 2007-08-30 17:02:37.000000000 -0700 @@ -43,6 +43,7 @@ #include "xattr.h" #include "acl.h" #include "namei.h" +#include "group.h" static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); @@ -1247,6 +1248,93 @@ return res; } +#if !defined(CONFIG_CRC16) +/** CRC table for the CRC-16. The poly is 0x8005 (x16 + x15 + x2 + 1) */ +__u16 const crc16_table[256] = { + 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, + 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, + 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, + 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, + 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, + 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, + 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, + 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, + 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, + 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, + 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, + 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, + 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, + 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, + 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, + 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, + 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, + 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, + 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, + 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, + 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, + 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, + 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, + 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, + 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, + 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, + 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, + 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, + 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, + 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, + 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, + 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 +}; + +static inline __u16 crc16_byte(__u16 crc, const __u8 data) +{ + return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; +} + +__u16 crc16(__u16 crc, __u8 const *buffer, size_t len) +{ + while (len--) + crc = crc16_byte(crc, *buffer++); + return crc; +} +#endif + +__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, + struct ext4_group_desc *gdp) +{ + __u16 crc = 0; + + if (sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + int offset = offsetof(struct ext4_group_desc, bg_checksum); + __le32 le_group = cpu_to_le32(block_group); + + crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); + crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); + crc = crc16(crc, (__u8 *)gdp, offset); + offset += sizeof(gdp->bg_checksum); /* skip checksum */ + /* for checksum of struct ext4_group_desc do the rest...*/ + if ((sbi->s_es->s_feature_incompat & + cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && + offset < le16_to_cpu(sbi->s_es->s_desc_size)) + crc = crc16(crc, (__u8 *)gdp + offset, + le16_to_cpu(sbi->s_es->s_desc_size) - + offset); + } + + return cpu_to_le16(crc); +} + +int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, + struct ext4_group_desc *gdp) +{ + if ((sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && + (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) + return 0; + + return 1; +} + /* Called at mount-time, super-block is locked */ static int ext4_check_descriptors (struct super_block * sb) { @@ -1301,6 +1389,14 @@ i, inode_table); return 0; } + if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { + ext4_error(sb, __FUNCTION__, + "Checksum for group %d failed (%u!=%u)\n", i, + le16_to_cpu(ext4_group_desc_csum(sbi, i, + gdp)), + le16_to_cpu(gdp->bg_checksum)); + return 0; + } first_block += EXT4_BLOCKS_PER_GROUP(sb); gdp = (struct ext4_group_desc *) ((__u8 *)gdp + EXT4_DESC_SIZE(sb)); Index: linux-2.6.23-rc4/include/linux/ext4_fs.h =================================================================== --- linux-2.6.23-rc4.orig/include/linux/ext4_fs.h 2007-08-30 17:02:36.000000000 -0700 +++ linux-2.6.23-rc4/include/linux/ext4_fs.h 2007-08-30 17:02:37.000000000 -0700 @@ -123,19 +123,25 @@ */ struct ext4_group_desc { - __le32 bg_block_bitmap; /* Blocks bitmap block */ - __le32 bg_inode_bitmap; /* Inodes bitmap block */ + __le32 bg_block_bitmap; /* Blocks bitmap block */ + __le32 bg_inode_bitmap; /* Inodes bitmap block */ __le32 bg_inode_table; /* Inodes table block */ __le16 bg_free_blocks_count; /* Free blocks count */ __le16 bg_free_inodes_count; /* Free inodes count */ __le16 bg_used_dirs_count; /* Directories count */ - __u16 bg_flags; - __u32 bg_reserved[3]; + __u16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ + __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ + __le16 bg_itable_unused; /* Unused inodes count */ + __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ __le32 bg_inode_table_hi; /* Inodes table block MSB */ }; +#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ +#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ +#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ + #ifdef __KERNEL__ #include <linux/ext4_fs_i.h> #include <linux/ext4_fs_sb.h> @@ -693,6 +699,7 @@ #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 @@ -712,6 +719,7 @@ EXT4_FEATURE_INCOMPAT_64BIT) #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR) ^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] uninitialized block groups [not found] ` <20070828201840.GX5377@schatzie.adilger.int> 2007-08-31 0:46 ` Avantika Mathur @ 2007-09-07 23:06 ` Avantika Mathur 1 sibling, 0 replies; 3+ messages in thread From: Avantika Mathur @ 2007-09-07 23:06 UTC (permalink / raw) To: Andreas Dilger; +Cc: linux-ext4 [-- Attachment #1: Type: text/plain, Size: 367 bytes --] Andreas Dilger wrote: > One last note - if this is being applied on top of mballoc it needs one > last patch for that file (against the 2.6.18-rhel5 tree, should work > with s/ext3/ext4/ I think): > Here is the patch for uninitialized block group support in mballoc ported to ext4; in the patch queue after mballoc-mkdir-oops-fix.patch. Thanks Avantika ---- [-- Attachment #2: mballoc-uninit-fix --] [-- Type: text/plain, Size: 3583 bytes --] Signed-off-by: Andreas Dilger <adilger@clusterfs.com> Index: linux-2.6.23-rc4/fs/ext4/mballoc.c =================================================================== --- linux-2.6.23-rc4.orig/fs/ext4/mballoc.c 2007-09-05 13:57:53.000000000 -0700 +++ linux-2.6.23-rc4/fs/ext4/mballoc.c 2007-09-05 13:59:51.000000000 -0700 @@ -36,6 +36,8 @@ #include <linux/seq_file.h> #include <linux/version.h> +#include "group.h" + /* * MUSTDO: * - test ext4_ext_search_left() and ext4_ext_search_right() @@ -321,6 +323,7 @@ unsigned long bb_state; unsigned long bb_tid; struct ext4_free_metadata *bb_md_cur; + struct ext4_group_desc *bb_gdp; unsigned short bb_first_free; unsigned short bb_free; unsigned short bb_fragments; @@ -921,10 +924,7 @@ if (first_group + i >= EXT4_SB(sb)->s_groups_count) break; - err = -EIO; - desc = ext4_get_group_desc(sb, first_group + i, NULL); - if (desc == NULL) - goto out; + desc = EXT4_GROUP_INFO(sb, first_group + i)->bb_gdp; err = -ENOMEM; bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); @@ -939,7 +939,12 @@ unlock_buffer(bh[i]); continue; } - + if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + ext4_init_block_bitmap(sb, bh[i], first_group + i,desc); + set_buffer_uptodate(bh[i]); + unlock_buffer(bh[i]); + continue; + } get_bh(bh[i]); bh[i]->b_end_io = end_buffer_read_sync; submit_bh(READ, bh[i]); @@ -1722,6 +1727,10 @@ switch (cr) { case 0: BUG_ON(ac->ac_2order == 0); + /* If this group is uninitialized, skip it initially */ + if (grp->bb_gdp->bg_flags & + cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) + return 0; bits = ac->ac_sb->s_blocksize_bits + 1; for (i = ac->ac_2order; i <= bits; i++) if (grp->bb_counters[i] > 0) @@ -1825,7 +1834,9 @@ } ac->ac_groups_scanned++; - if (cr == 0) + if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags & + cpu_to_le16(EXT4_BG_BLOCK_UNINIT) && + ac->ac_2order != 0)) ext4_mb_simple_scan_group(ac, &e3b); else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe) ext4_mb_scan_aligned(ac, &e3b); @@ -2304,12 +2315,13 @@ i--; goto err_freebuddy; } + memset(meta_group_info[j], 0, len); desc = ext4_get_group_desc(sb, i, NULL); + meta_group_info[j]->bb_gdp = desc; if (desc == NULL) { printk(KERN_ERR"EXT4-fs: can't read descriptor %u\n",i); goto err_freebuddy; } - memset(meta_group_info[j], 0, len); set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &meta_group_info[j]->bb_state); @@ -2874,9 +2886,17 @@ mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + gdp->bg_free_blocks_count = + cpu_to_le16(ext4_free_blocks_after_init(sb, + ac->ac_b_ex.fe_group, + gdp)); + } gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - ac->ac_b_ex.fe_len); + gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len); @@ -4256,6 +4276,7 @@ spin_lock(sb_bgl_lock(sbi, block_group)); gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); + gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_mod(&sbi->s_freeblocks_counter, count); ^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2007-09-07 23:05 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-23 5:41 [PATCH] uninitialized block groups Avantika Mathur
[not found] ` <20070828201840.GX5377@schatzie.adilger.int>
2007-08-31 0:46 ` Avantika Mathur
2007-09-07 23:06 ` Avantika Mathur
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).