public inbox for linux-ext4@vger.kernel.org
 help / color / mirror / Atom feed
* Patch queue update
@ 2007-12-24  6:30 Aneesh Kumar K.V
  0 siblings, 0 replies; 11+ messages in thread
From: Aneesh Kumar K.V @ 2007-12-24  6:30 UTC (permalink / raw)
  To: Mingming Cao; +Cc: ext4 development

Hi Mingming,

I have placed the updated patch queue at

http://www.radian.org/~kvaneesh/ext4/dec-24-2007/

The .tar can be found at

http://www.radian.org/~kvaneesh/ext4/dec-24-2007/patch-series.tar

The changes involve the below attached diff. I also updated the commit message
of mballoc core patch.

a) remove the ext-truncate-mutex.patch
b) Add the truncate mutex to read write semaphore conversion patch set.
    This is added at the top of unstable series expecting that it can be
    pushed to stable soon. 
c) Update mballoc and migrate patch to use i_data_sem.
d) mballoc soft lockup fix.
e) update mballoc core patch commit message.
f) Update enable-delalloc-and-mballoc.patch patch to enabled write back
   mode by default. Delalloc only support write back.


diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index abc8900..643046b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -526,7 +526,7 @@ static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
  * when setting the reservation window size through ioctl before the file
  * is open for write (needs block allocation).
  *
- * Needs truncate_mutex protection prior to call this function.
+ * Needs down_write(i_data_sem) protection prior to call this function.
  */
 void ext4_init_block_alloc_info(struct inode *inode)
 {
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ff83982..99e539d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1706,7 +1706,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
  * This routine returns max. credits that the extent tree can consume.
  * It should be OK for low-performance paths like ->writepage()
  * To allow many writing processes to fit into a single transaction,
- * the caller should calculate credits under truncate_mutex and
+ * the caller should calculate credits under i_data_sem and
  * pass the actual path.
  */
 int ext4_ext_calc_credits_for_insert(struct inode *inode,
@@ -2272,7 +2272,8 @@ out:
 
 /*
  * Need to be called with
- * mutex_lock(&EXT4_I(inode)->truncate_mutex);
+ * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
+ * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
  */
 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			ext4_lblk_t iblock,
@@ -2514,7 +2515,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
 	if (page)
 		ext4_block_truncate_page(handle, page, mapping, inode->i_size);
 
-	mutex_lock(&EXT4_I(inode)->truncate_mutex);
+	down_write(&EXT4_I(inode)->i_data_sem);
 	ext4_ext_invalidate_cache(inode);
 
 	ext4_mb_discard_inode_preallocations(inode);
@@ -2552,7 +2553,7 @@ out_stop:
 	if (inode->i_nlink)
 		ext4_orphan_del(handle, inode);
 
-	mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+	up_write(&EXT4_I(inode)->i_data_sem);
 	ext4_journal_stop(handle);
 }
 
@@ -2616,6 +2617,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 	 * modify 1 super block, 1 block bitmap and 1 group descriptor.
 	 */
 	credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
+	down_write((&EXT4_I(inode)->i_data_sem));
 retry:
 	while (ret >= 0 && ret < max_blocks) {
 		block = block + ret;
@@ -2672,6 +2674,7 @@ retry:
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 
+	up_write((&EXT4_I(inode)->i_data_sem));
 	/*
 	 * Time to update the file size.
 	 * Update only when preallocation was requested beyond the file size.
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index a6b2aa1..ac35ec5 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -37,9 +37,9 @@ static int ext4_release_file (struct inode * inode, struct file * filp)
 	if ((filp->f_mode & FMODE_WRITE) &&
 			(atomic_read(&inode->i_writecount) == 1))
 	{
-		mutex_lock(&EXT4_I(inode)->truncate_mutex);
+		down_write(&EXT4_I(inode)->i_data_sem);
 		ext4_discard_reservation(inode);
-		mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+		up_write(&EXT4_I(inode)->i_data_sem);
 	}
 	if (is_dx(inode) && filp->private_data)
 		ext4_htree_free_dir_info(filp->private_data);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index caec966..33018a5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -346,7 +346,7 @@ static int ext4_block_to_path(struct inode *inode,
  *	the whole chain, all way to the data (returns %NULL, *err == 0).
  *
  *      Need to be called with
- *      mutex_lock(&EXT4_I(inode)->truncate_mutex)
+ *      down_read(&EXT4_I(inode)->i_data_sem)
  */
 static Indirect *ext4_get_branch(struct inode *inode, int depth,
 				 ext4_lblk_t  *offsets,
@@ -778,7 +778,8 @@ err_out:
  *
  *
  * Need to be called with
- * mutex_lock(&EXT4_I(inode)->truncate_mutex)
+ * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
+ * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
  */
 int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
 		ext4_lblk_t iblock, unsigned long maxblocks,
@@ -866,7 +867,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
 		err = ext4_splice_branch(handle, inode, iblock,
 					partial, indirect_blks, count);
 	/*
-	 * i_disksize growing is protected by truncate_mutex.  Don't forget to
+	 * i_disksize growing is protected by i_data_sem.  Don't forget to
 	 * protect it if you're about to implement concurrent
 	 * ext4_get_block() -bzzz
 	*/
@@ -896,6 +897,46 @@ out:
 
 #define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32)
 
+int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
+			unsigned long max_blocks, struct buffer_head *bh,
+			int create, int extend_disksize)
+{
+	int retval;
+	/*
+	 * Try to see if we can get  the block without requesting
+	 * for new file system block.
+	 */
+	down_read((&EXT4_I(inode)->i_data_sem));
+	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+		retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
+				bh, 0, 0);
+	} else {
+		retval =  ext4_get_blocks_handle(handle, inode, block, max_blocks,
+				bh, 0, 0);
+	}
+	up_read((&EXT4_I(inode)->i_data_sem));
+	if (!create || (retval > 0))
+		return retval;
+
+	/*
+	 * We need to allocate new blocks which will result
+	 * in i_data update
+	 */
+	down_write((&EXT4_I(inode)->i_data_sem));
+	/*
+	 * We need to check for EXT4 here because migrate
+	 * could have changed the inode type in between
+	 */
+	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+		retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
+				bh, create, extend_disksize);
+	} else {
+		retval =  ext4_get_blocks_handle(handle, inode, block, max_blocks,
+				bh, create, extend_disksize);
+	}
+	up_write((&EXT4_I(inode)->i_data_sem));
+	return retval;
+}
 static int ext4_get_block(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create)
 {
@@ -1386,10 +1427,10 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
 			/*
 			 * XXX: replace with spinlock if seen contended -bzzz
 			 */
-			mutex_lock(&EXT4_I(inode)->truncate_mutex);
+			down_write(&EXT4_I(inode)->i_data_sem);
 			if (disksize > EXT4_I(inode)->i_disksize)
 				EXT4_I(inode)->i_disksize = disksize;
-			mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+			up_write(&EXT4_I(inode)->i_data_sem);
 
 			if (EXT4_I(inode)->i_disksize == disksize) {
 				if (handle == NULL)
@@ -1540,7 +1581,7 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  *	ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ...
  *
  * Same applies to ext4_get_block().  We will deadlock on various things like
- * lock_journal and i_truncate_mutex.
+ * lock_journal and i_data_sem
  *
  * Setting PF_MEMALLOC here doesn't work - too many internal memory
  * allocations fail.
@@ -2484,7 +2525,7 @@ void ext4_truncate(struct inode *inode)
 	 * From here we block out all ext4_get_block() callers who want to
 	 * modify the block allocation tree.
 	 */
-	mutex_lock(&ei->truncate_mutex);
+	down_write(&ei->i_data_sem);
 
 	if (n == 1) {		/* direct blocks */
 		ext4_free_data(handle, inode, NULL, i_data+offsets[0],
@@ -2548,7 +2589,7 @@ do_indirects:
 
 	ext4_discard_reservation(inode);
 
-	mutex_unlock(&ei->truncate_mutex);
+	up_write(&ei->i_data_sem);
 	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 	ext4_mark_inode_dirty(handle, inode);
 
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 3a5327e..2ed7c37 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -199,7 +199,7 @@ flags_err:
 		 * need to allocate reservation structure for this inode
 		 * before set the window size
 		 */
-		mutex_lock(&ei->truncate_mutex);
+		down_write(&ei->i_data_sem);
 		if (!ei->i_block_alloc_info)
 			ext4_init_block_alloc_info(inode);
 
@@ -207,7 +207,7 @@ flags_err:
 			struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
 			rsv->rsv_goal_size = rsv_window_size;
 		}
-		mutex_unlock(&ei->truncate_mutex);
+		up_write(&ei->i_data_sem);
 		return 0;
 	}
 	case EXT4_IOC_GROUP_EXTEND: {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 72e1920..cbc8143 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -131,7 +131,7 @@
  *      the simplest way would be to have buddy initialized by the discard
  *  - new PA vs.
  *    - use inode PA
- *      i_truncate_mutex serializes them
+ *      i_data_sem serializes them
  *    - discard inode PA
  *      discard process must wait until PA isn't used by another process
  *    - use locality group PA
@@ -140,7 +140,7 @@
  *      discard process must wait until PA isn't used by another process
  *  - use inode PA
  *    - use inode PA
- *      i_truncate_mutex or another mutex should serializes them
+ *      i_data_sem or another mutex should serializes them
  *    - discard inode PA
  *      discard process must wait until PA isn't used by another process
  *    - use locality group PA
@@ -3729,7 +3729,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
 	struct list_head list;
 	struct ext4_buddy e4b;
 	int err;
-	int busy;
+	int busy = 0;
 	int free = 0;
 
 	mb_debug("discard preallocation for group %lu\n", group);
@@ -3754,20 +3754,12 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
 	INIT_LIST_HEAD(&list);
 
 repeat:
-	busy = 0;
 	ext4_lock_group(sb, group);
 	list_for_each_entry_safe(pa, tmp,
 				&grp->bb_prealloc_list, pa_group_list) {
 		spin_lock(&pa->pa_lock);
 		if (atomic_read(&pa->pa_count)) {
 			spin_unlock(&pa->pa_lock);
-			/* FIXME!!
-			 * It is quiet natural to have the pa being
-			 * used on other cpus when we are trying free
-			 * space
-			printk(KERN_ERR "uh! busy PA\n");
-			dump_stack();
-			*/
 			busy = 1;
 			continue;
 		}
@@ -3790,7 +3782,9 @@ repeat:
 
 	/* if we still need more blocks and some PAs were used, try again */
 	if (free < needed && busy) {
+		busy = 0;
 		ext4_unlock_group(sb, group);
+		schedule_timeout(HZ);
 		goto repeat;
 	}
 
@@ -3827,7 +3821,7 @@ out:
 /*
  * releases all non-used preallocated blocks for given inode
  *
- * It's important to discard preallocations under truncate_mutex
+ * It's important to discard preallocations under i_data_sem
  * We don't want another block to be served from the prealloc
  * space when we are discarding the inode prealloc space.
  *
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b54c084..0e4bffe 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -463,7 +463,7 @@ int ext4_ext_migrate(struct inode * inode, struct file * filp,
 	if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
 		return -EINVAL;
 
-	mutex_lock(&EXT4_I(inode)->truncate_mutex);
+	down_write(&EXT4_I(inode)->i_data_sem);
 
 
 	handle = ext4_journal_start(inode,
@@ -621,7 +621,7 @@ err_out:
 
 	ext4_journal_stop(handle);
 
-	mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+	up_write(&EXT4_I(inode)->i_data_sem);
 
 	if (tmp_inode)
 		iput(tmp_inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 00e2cbb..ba4bac7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -596,7 +596,7 @@ static void init_once(struct kmem_cache *cachep, void *foo)
 #ifdef CONFIG_EXT4DEV_FS_XATTR
 	init_rwsem(&ei->xattr_sem);
 #endif
-	mutex_init(&ei->truncate_mutex);
+	init_rwsem(&ei->i_data_sem);
 	inode_init_once(&ei->vfs_inode);
 }
 
@@ -1889,6 +1889,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	 */
 	set_opt(sbi->s_mount_opt, EXTENTS);
 	set_opt(sbi->s_mount_opt, DELALLOC);
+	set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
 	set_opt(sbi->s_mount_opt, MBALLOC);
 
 	if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 95b4501..aff5868 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -1158,27 +1158,9 @@ extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
 extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
 			  loff_t len);
-static inline int
-ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
-			unsigned long max_blocks, struct buffer_head *bh,
-			int create, int extend_disksize)
-{
-	int retval;
-	mutex_lock(&EXT4_I(inode)->truncate_mutex);
-	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
-		retval = ext4_ext_get_blocks(handle, inode,
-						(ext4_lblk_t)block, max_blocks,
-						bh, create, extend_disksize);
-	} else {
-		retval = ext4_get_blocks_handle(handle, inode,
-						(ext4_lblk_t)block, max_blocks,
-						bh, create, extend_disksize);
-	}
-	mutex_unlock(&EXT4_I(inode)->truncate_mutex);
-	return retval;
-}
-
-
+extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
+			sector_t block, unsigned long max_blocks,
+			struct buffer_head *bh, int create, int extend_disksize);
 #endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_EXT4_FS_H */
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index 7cba9c4..d5508d3 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -139,16 +139,16 @@ struct ext4_inode_info {
 	__u16 i_extra_isize;
 
 	/*
-	 * truncate_mutex is for serialising ext4_truncate() against
+	 * i_data_sem is for serialising ext4_truncate() against
 	 * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
 	 * data tree are chopped off during truncate. We can't do that in
 	 * ext4 because whenever we perform intermediate commits during
 	 * truncate, the inode and all the metadata blocks *must* be in a
 	 * consistent state which allows truncation of the orphans to restart
 	 * during recovery.  Hence we must fix the get_block-vs-truncate race
-	 * by other means, so we have truncate_mutex.
+	 * by other means, so we have i_data_sem.
 	 */
-	struct mutex truncate_mutex;
+	struct rw_semaphore i_data_sem;
 	struct inode vfs_inode;
 
 	unsigned long i_ext_generation;

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* patch queue update
@ 2008-01-10 15:33 Aneesh Kumar K.V
  2008-01-10 21:43 ` Andreas Dilger
  0 siblings, 1 reply; 11+ messages in thread
From: Aneesh Kumar K.V @ 2008-01-10 15:33 UTC (permalink / raw)
  To: Mingming Cao; +Cc: ext4 development

Hi Mingming,

New patches for patch queue can be found at
http://www.radian.org/~kvaneesh/ext4/jan-10-2008-ver2/

The changes are
------------
a) mballoc patch got an explanation about regular allocator.
b) mballoc regular allocator we changed the usage of ffs to fls. I guess
it makes sense to use fls because we want to compare it against the
tunable s_mb_order2_reqs. Only request above this order are using
criteria 0 allocation.
c) stripe.patch to use the stripe size set in the super block for block
allocation.

The diff is attached for reference.

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0d31817..0085fde 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -468,7 +468,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *);
 static void ext4_mb_return_to_preallocation(struct inode *inode,
 					struct ext4_buddy *e4b, sector_t block,
 					int count);
-static void ext4_mb_show_ac(struct ext4_allocation_context *ac);
 static void ext4_mb_put_pa(struct ext4_allocation_context *, struct super_block *,
 						struct ext4_prealloc_space *pa);
 static int ext4_mb_init_per_dev_proc(struct super_block *sb);
@@ -1838,14 +1837,23 @@ static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
 		goto out;
 
-	i = ffs(ac->ac_g_ex.fe_len);
+	/*
+	 * ac->ac2_order is set only if the fe_len is a power of 2
+	 * if ac2_order is set we also set criteria to 0 so whtat we
+	 * try exact allocation using buddy.
+	 */
+	i = fls(ac->ac_g_ex.fe_len);
 	ac->ac_2order = 0;
-	/* FIXME!!
-	 * What happens if i is still greater than s_mb_order2_reqs
+	/*
+	 * We search using buddy data only if the order of the request
+	 * is greater than equal to the sbi_s_mb_order2_reqs
+	 * You can tune it via /proc/fs/ext4/<partition>/order2_req
 	 */
 	if (i >= sbi->s_mb_order2_reqs) {
-		i--;
-		if ((ac->ac_g_ex.fe_len & (~(1 << i))) == 0)
+		/*
+		 * This should tell if fe_len is exactly power of 2
+		 */
+		if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
 			ac->ac_2order = i;
 	}
 
@@ -1865,17 +1873,17 @@ static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 		spin_unlock(&sbi->s_md_lock);
 	}
 
+	/* searching for the right group start from the goal value specified */
 	group = ac->ac_g_ex.fe_group;
 
 	/* Let's just scan groups to find more-less suitable blocks */
 	cr = ac->ac_2order ? 0 : 1;
+	/*
+	 * cr == 0 try to get exact allocation,
+	 * cr == 3  try to get anything
+	 */
 repeat:
 	for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
-		/* FIXME!!
-		 * We need to explain what criteria is and also
-		 * need to define the number 0 to 4 for criteria
-		 * What they actually means.
-		 */
 		ac->ac_criteria = cr;
 		for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
 			struct ext4_group_info *grp;
@@ -1889,23 +1897,28 @@ repeat:
 			if (grp->bb_free == 0)
 				continue;
 
+			/*
+			 * if the group is already init we check whether it is
+			 * a good group and if not we don't load the buddy
+			 */
 			if (EXT4_MB_GRP_NEED_INIT(EXT4_GROUP_INFO(sb, group))) {
-				/* we need full data about the group
-				 * to make a good selection */
+				/*
+				 * we need full data about the group
+				 * to make a good selection
+				 */
 				err = ext4_mb_load_buddy(sb, group, &e4b);
 				if (err)
 					goto out;
 				ext4_mb_release_desc(&e4b);
 			}
 
-			/* check is group good for our criteries */
+			/*
+			 * If the particular group doesn't satisfy our
+			 * criteria we continue with the next group
+			 */
 			if (!ext4_mb_good_group(ac, group, cr))
 				continue;
 
-			/* FIXME!!
-			 * here also we are loading the buddy. so what difference
-			 * does EXT4_MB_GRP_NEED_INIT actually make
-			 */
 			err = ext4_mb_load_buddy(sb, group, &e4b);
 			if (err)
 				goto out;
@@ -3726,10 +3739,9 @@ repeat:
 		busy = 0;
 		ext4_unlock_group(sb, group);
 		/*
-		 * We see this quiet rare. But if a particular workload is
-		 * effected by this we may need to add a waitqueue
+		 * Yield the CPU here so that we don't get soft lockup
 		 */
-		schedule_timeout(HZ);
+		schedule();
 		goto repeat;
 	}
 
@@ -3808,7 +3820,7 @@ repeat:
 			printk(KERN_ERR "uh-oh! used pa while discarding\n");
 			dump_stack();
 			current->state = TASK_UNINTERRUPTIBLE;
-			schedule();
+			schedule_timeout(HZ);
 			goto repeat;
 
 		}
@@ -3832,8 +3844,12 @@ repeat:
 		 * pa from inode's list may access already
 		 * freed memory, bad-bad-bad */
 
+		/* XXX: if this happens too often, we can
+		 * add a flag to force wait only in case
+		 * of ->clear_inode(), but not in case of
+		 * regular truncate */
 		current->state = TASK_UNINTERRUPTIBLE;
-		schedule();
+		schedule_timeout(HZ);
 		goto repeat;
 	}
 	spin_unlock(&ei->i_prealloc_lock);
@@ -3878,7 +3894,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
 {
 	BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
 }
-
+#ifdef MB_DEBUG
 static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 {
 	struct super_block *sb = ac->ac_sb;
@@ -3928,6 +3944,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 	}
 	printk(KERN_ERR "\n");
 }
+#else
+#define ext4_mb_show_ac(x)
+#endif
 
 /*
  * We use locality group preallocation for small size file. The size of the
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c69f4e5..9d91c60 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1775,6 +1775,21 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
 	return (has_super + ext4_group_first_block_no(sb, bg));
 }
 
+static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
+{
+	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
+	unsigned long stripe_width = le32_to_cpu(sbi->s_es->s_raid_stripe_width);
+
+	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) {
+		return sbi->s_stripe;
+	} else if (stripe_width <= sbi->s_blocks_per_group) {
+		return stripe_width;
+	} else if (stride <= sbi->s_blocks_per_group) {
+		return stride;
+	}
+
+	return 0;
+}
 
 static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 				__releases(kernel_sem)
@@ -2131,6 +2146,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
 	sbi->s_rsv_window_head.rsv_goal_size = 0;
 	ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
+	/*
+	 * set the stripe size. If we have specified it via mount option, then
+	 * use the mount option value. If the value specified at mount time is
+	 * greater than the blocks per group use the super block value.
+	 * Allocator needs it be less than blocks per group.
+	 */
+	sbi->s_stripe = ext4_get_stripe_size(sbi);
 
 	/*
 	 * set up enough so that it can read an inode

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: patch queue update
  2008-01-10 15:33 patch " Aneesh Kumar K.V
@ 2008-01-10 21:43 ` Andreas Dilger
  2008-01-11  4:09   ` Aneesh Kumar K.V
  0 siblings, 1 reply; 11+ messages in thread
From: Andreas Dilger @ 2008-01-10 21:43 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: linux-ext4

On Jan 10, 2008  21:03 +0530, Aneesh Kumar K.V wrote:
>  	if (i >= sbi->s_mb_order2_reqs) {
> -		i--;
> -		if ((ac->ac_g_ex.fe_len & (~(1 << i))) == 0)
> +		/*
> +		 * This should tell if fe_len is exactly power of 2
> +		 */
> +		if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
>  			ac->ac_2order = i;

While you changed i to (i - 1) in the "if" you didn't change it when
setting ac_2order...  Is that incorrect?

>  		/*
> +		 * Yield the CPU here so that we don't get soft lockup
>  		 */
> -		schedule_timeout(HZ);
> +		schedule();
>  		goto repeat;
>  	}
>  
> @@ -3808,7 +3820,7 @@ repeat:
>  			printk(KERN_ERR "uh-oh! used pa while discarding\n");
>  			dump_stack();
>  			current->state = TASK_UNINTERRUPTIBLE;
> -			schedule();
> +			schedule_timeout(HZ);
>  			goto repeat;

Is this change to schedule_timeout() intentional?  The earlier code is
removing the use of schedule_timeout.  I could be wrong, as I didn't
follow this discussion closely, but sometimes changes like this happen
accidentally and people don't look at the patch itself...

> +static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
> +{
> +	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
> +	unsigned long stripe_width = le32_to_cpu(sbi->s_es->s_raid_stripe_width);
> +
> +	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) {
> +		return sbi->s_stripe;
> +	} else if (stripe_width <= sbi->s_blocks_per_group) {
> +		return stripe_width;
> +	} else if (stride <= sbi->s_blocks_per_group) {
> +		return stride;
> +	}

If you are doing "return XXX" you don't need "else".

> +	/*
> +	 * set the stripe size. If we have specified it via mount option, then
> +	 * use the mount option value. If the value specified at mount time is
> +	 * greater than the blocks per group use the super block value.
> +	 * Allocator needs it be less than blocks per group.
> +	 */
> +	sbi->s_stripe = ext4_get_stripe_size(sbi);

This comment should probably go by ext4_get_stripe_size() definition instead
of here at the caller.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: patch queue update
  2008-01-10 21:43 ` Andreas Dilger
@ 2008-01-11  4:09   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 11+ messages in thread
From: Aneesh Kumar K.V @ 2008-01-11  4:09 UTC (permalink / raw)
  To: linux-ext4

On Thu, Jan 10, 2008 at 02:43:23PM -0700, Andreas Dilger wrote:
> On Jan 10, 2008  21:03 +0530, Aneesh Kumar K.V wrote:
> >  	if (i >= sbi->s_mb_order2_reqs) {
> > -		i--;
> > -		if ((ac->ac_g_ex.fe_len & (~(1 << i))) == 0)
> > +		/*
> > +		 * This should tell if fe_len is exactly power of 2
> > +		 */
> > +		if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
> >  			ac->ac_2order = i;
> 
> While you changed i to (i - 1) in the "if" you didn't change it when
> setting ac_2order...  Is that incorrect?

Yes that ac_2order should be i - 1;
Will fix it in the next update.

I see that the patch queue update doesn't have most of the changes I
have placed at http://www.radian.org/~kvaneesh/ext4/jan-10-2008-ver2/

> 
> >  		/*
> > +		 * Yield the CPU here so that we don't get soft lockup
> >  		 */
> > -		schedule_timeout(HZ);
> > +		schedule();
> >  		goto repeat;
> >  	}
> >  
> > @@ -3808,7 +3820,7 @@ repeat:
> >  			printk(KERN_ERR "uh-oh! used pa while discarding\n");
> >  			dump_stack();
> >  			current->state = TASK_UNINTERRUPTIBLE;
> > -			schedule();
> > +			schedule_timeout(HZ);
> >  			goto repeat;
> 
> Is this change to schedule_timeout() intentional?  The earlier code is
> removing the use of schedule_timeout.  I could be wrong, as I didn't
> follow this discussion closely, but sometimes changes like this happen
> accidentally and people don't look at the patch itself...


The patch queue had it modified from schedule_timeout to schedule(). I
am moving it back to the original version. If we have set the task state
to TASK_UNINTERRUPTIBLE it should be schedule_timeout. And at these
place we intent to wait uninterrupted for 1 sec. The place where we
wanted to just yield is ext4_mb_discard_group_preallocations.


> > +static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
> > +{
> > +	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
> > +	unsigned long stripe_width = le32_to_cpu(sbi->s_es->s_raid_stripe_width);
> > +
> > +	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) {
> > +		return sbi->s_stripe;
> > +	} else if (stripe_width <= sbi->s_blocks_per_group) {
> > +		return stripe_width;
> > +	} else if (stride <= sbi->s_blocks_per_group) {
> > +		return stride;
> > +	}
> 
> If you are doing "return XXX" you don't need "else".
> 
> > +	/*
> > +	 * set the stripe size. If we have specified it via mount option, then
> > +	 * use the mount option value. If the value specified at mount time is
> > +	 * greater than the blocks per group use the super block value.
> > +	 * Allocator needs it be less than blocks per group.
> > +	 */
> > +	sbi->s_stripe = ext4_get_stripe_size(sbi);
> 
> This comment should probably go by ext4_get_stripe_size() definition instead
> of here at the caller.

Will move that to the function definition.

-aneesh

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Patch queue update
@ 2008-01-24 14:50 Aneesh Kumar K.V
  2008-01-24 16:26 ` Andreas Dilger
  2008-01-24 19:50 ` Mingming Cao
  0 siblings, 2 replies; 11+ messages in thread
From: Aneesh Kumar K.V @ 2008-01-24 14:50 UTC (permalink / raw)
  To: Theodore Ts'o, Mingming Cao; +Cc: linux-ext4@vger.kernel.org

I have updated patches based on the review feedback from Andrew.

I have tested this on 
128(64p) ppc64 	sles
4(2p)    ppc64  debian
4(2p)	 x86_64	ubuntu-gutsy

Updated patches are at
http://www.radian.org/~kvaneesh/ext4/jan-24-2008/
http://www.radian.org/~kvaneesh/ext4/jan-24-2008/patches.tar

Diff for reference

diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 4f329af..ec7d349 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -89,6 +89,8 @@ When mounting an ext4 filesystem, the following option are accepted:
 extents			ext4 will use extents to address file data.  The
 			file system will no longer be mountable by ext3.
 
+noextents		ext4 will not use extents for new files created.
+
 journal_checksum	Enable checksumming of the journal transactions.
 			This will allow the recovery code in e2fsck and the
 			kernel to detect corruption in the kernel.  It is a
@@ -206,6 +208,10 @@ nobh			(a) cache disk block mapping information
 			"nobh" option tries to avoid associating buffer
 			heads (supported only for "writeback" mode).
 
+mballoc		(*)	Use the mutliblock allocator for block allocation
+nomballoc		disabled multiblock allocator for block allocation.
+stripe=n		filesystem blocks per stripe for a RAID configuration.
+
 
 Data Mode
 ---------
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index dec9945..4413a2d 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -857,6 +857,45 @@ CPUs.
 The   "procs_blocked" line gives  the  number of  processes currently blocked,
 waiting for I/O to complete.
 
+1.9 Ext4 file system parameters
+------------------------------
+Ext4 file system have one directory per partition under /proc/fs/ext4/
+# ls /proc/fs/ext4/hdc/
+group_prealloc  max_to_scan  mb_groups  mb_history  min_to_scan  order2_req
+stats  stream_req
+
+mb_groups:
+This file gives the details of mutiblock allocator buddy cache of free blocks
+
+mb_history:
+Multiblock allocation history.
+
+stats:
+This file indicate whether the multiblock allocator should start collecting
+statistics. The statistics are shown during unmount
+
+group_prealloc:
+The multiblock allocator normalize the block allocation request to
+group_prealloc filesystem blocks if we don't have strip value set.
+The stripe value can be specified at mount time or during mke2fs.
+
+max_to_scan:
+How long multiblock allocator can look for a best extent (in found extents)
+
+min_to_scan:
+How long multiblock allocator  must look for a best extent
+
+order2_req:
+Multiblock allocator use  2^N search using buddies only for requests greater
+than or equal to order2_req. The request size is specfied in file system
+blocks. A value of 2 indicate only if the requests are greater than or equal
+to 4 blocks.
+
+stream_req:
+Files smaller than stream_req are served by the stream allocator, whose
+purpose is to pack requests as close each to other as possible to
+produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16
+filesystem block size will use group based preallocation.
 
 ------------------------------------------------------------------------------
 Summary
diff --git a/fs/buffer.c b/fs/buffer.c
index 982cf1a..921eeec 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3232,19 +3232,21 @@ int bh_uptodate_or_lock(struct buffer_head *bh)
 	return 1;
 }
 EXPORT_SYMBOL(bh_uptodate_or_lock);
+
 /**
  * bh_submit_read: Submit a locked buffer for reading
  * @bh: struct buffer_head
  *
- * Returns a negative error
+ * Returns zero on success and -EIO on error.
  */
 int bh_submit_read(struct buffer_head *bh)
 {
-	if (!buffer_locked(bh))
-		lock_buffer(bh);
+	BUG_ON(!buffer_locked(bh));
 
-	if (buffer_uptodate(bh))
+	if (buffer_uptodate(bh)) {
+		unlock_buffer(bh);
 		return 0;
+	}
 
 	get_bh(bh);
 	bh->b_end_io = end_buffer_read_sync;
@@ -3255,6 +3257,7 @@ int bh_submit_read(struct buffer_head *bh)
 	return -EIO;
 }
 EXPORT_SYMBOL(bh_submit_read);
+
 void __init buffer_init(void)
 {
 	int nrpages;
diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
index 4ef3dc0..0d76c74 100644
--- a/fs/ext4/defrag.c
+++ b/fs/ext4/defrag.c
@@ -30,14 +30,6 @@ ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 	return block;
 }
 
-/* Will go away */
-static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
-{
-	ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
-	ex->ee_start_hi =
-			cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
-}
-
 /*
  * this structure is used to gather extents from the tree via ioctl
  */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index cbda084..c2caf97 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -75,7 +75,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
  * stores a large physical block number into an extent struct,
  * breaking it into parts
  */
-static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
+void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
 {
 	ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
 	ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e348ceb..bec699a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -435,7 +435,7 @@ struct ext4_free_metadata {
 
 struct ext4_group_info {
 	unsigned long	bb_state;
-	unsigned long 	bb_tid;
+	unsigned long	bb_tid;
 	struct ext4_free_metadata *bb_md_cur;
 	unsigned short	bb_first_free;
 	unsigned short	bb_free;
@@ -489,7 +489,7 @@ struct ext4_free_extent {
  */
 struct ext4_locality_group {
 	/* for allocator */
-	struct semaphore	lg_sem;		/* to serialize allocates */
+	struct mutex		lg_mutex;	/* to serialize allocates */
 	struct list_head	lg_prealloc_list;/* list of preallocations */
 	spinlock_t		lg_prealloc_lock;
 };
@@ -564,7 +564,10 @@ struct ext4_buddy {
 #define EXT4_MB_BUDDY(e4b)	((e4b)->bd_buddy)
 
 #ifndef EXT4_MB_HISTORY
-#define ext4_mb_store_history(ac)
+static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
+{
+	return;
+}
 #else
 static void ext4_mb_store_history(struct ext4_allocation_context *ac);
 #endif
@@ -642,6 +645,10 @@ static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
 
 static inline int mb_test_bit(int bit, void *addr)
 {
+	/*
+	 * ext4_test_bit on architecture like powerpc
+	 * needs unsigned long aligned address
+	 */
 	mb_correct_addr_and_bit(bit, addr);
 	return ext4_test_bit(bit, addr);
 }
@@ -670,7 +677,7 @@ static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
 	ext4_clear_bit_atomic(lock, bit, addr);
 }
 
-static inline void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
+static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
 {
 	char *bb;
 
@@ -753,9 +760,20 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
 }
 
 #else
-#define mb_free_blocks_double(a, b, c, d)
-#define mb_mark_used_double(a, b, c)
-#define mb_cmp_bitmaps(a, b)
+static inline void mb_free_blocks_double(struct inode *inode,
+				struct ext4_buddy *e4b, int first, int count)
+{
+	return;
+}
+static inline void mb_mark_used_double(struct ext4_buddy *e4b,
+						int first, int count)
+{
+	return;
+}
+static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
+{
+	return;
+}
 #endif
 
 #ifdef AGGRESSIVE_CHECK
@@ -878,26 +896,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
 #define mb_check_buddy(e4b)
 #endif
 
-/* find most significant bit */
-static int fmsb(unsigned short word)
-{
-	int order;
-
-	if (word > 255) {
-		order = 7;
-		word >>= 8;
-	} else {
-		order = -1;
-	}
-
-	do {
-		order++;
-		word >>= 1;
-	} while (word != 0);
-
-	return order;
-}
-
 /* FIXME!! need more doc */
 static void ext4_mb_mark_free_simple(struct super_block *sb,
 				void *buddy, unsigned first, int len,
@@ -918,7 +916,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
 		max = ffs(first | border) - 1;
 
 		/* find how many blocks of power 2 we need to mark */
-		min = fmsb(len);
+		min = fls(len) - 1;
 
 		if (max < min)
 			min = max;
@@ -1030,10 +1028,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 	if (groups_per_page > 1) {
 		err = -ENOMEM;
 		i = sizeof(struct buffer_head *) * groups_per_page;
-		bh = kmalloc(i, GFP_NOFS);
+		bh = kzalloc(i, GFP_NOFS);
 		if (bh == NULL)
 			goto out;
-		memset(bh, 0, i);
 	} else
 		bh = &bhs;
 
@@ -1056,15 +1053,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 		if (bh[i] == NULL)
 			goto out;
 
-		if (buffer_uptodate(bh[i]))
+		if (bh_uptodate_or_lock(bh[i]))
 			continue;
 
-		lock_buffer(bh[i]);
-		if (buffer_uptodate(bh[i])) {
-			unlock_buffer(bh[i]);
-			continue;
-		}
-
 		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 			ext4_init_block_bitmap(sb, bh[i],
 						first_group + i, desc);
@@ -1303,7 +1294,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
 	len = cur + len;
 	while (cur < len) {
 		if ((cur & 31) == 0 && (len - cur) >= 32) {
-			/* fast path: clear whole word at once */
+			/* fast path: set whole word at once */
 			addr = bm + (cur >> 3);
 			*addr = 0xffffffff;
 			cur += 32;
@@ -2681,7 +2672,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 	for (i = 0; i < NR_CPUS; i++) {
 		struct ext4_locality_group *lg;
 		lg = &sbi->s_locality_groups[i];
-		sema_init(&lg->lg_sem, 1);
+		mutex_init(&lg->lg_mutex);
 		INIT_LIST_HEAD(&lg->lg_prealloc_list);
 		spin_lock_init(&lg->lg_prealloc_lock);
 	}
@@ -2693,6 +2684,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 	return 0;
 }
 
+/* need to called with ext4 group lock (ext4_lock_group) */
 static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
 {
 	struct ext4_prealloc_space *pa;
@@ -2701,7 +2693,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
 
 	list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
 		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
-		list_del_rcu(&pa->pa_group_list);
+		list_del(&pa->pa_group_list);
 		count++;
 		kfree(pa);
 	}
@@ -2735,7 +2727,9 @@ int ext4_mb_release(struct super_block *sb)
 #ifdef DOUBLE_CHECK
 			kfree(grinfo->bb_bitmap);
 #endif
+			ext4_lock_group(sb,i);
 			ext4_mb_cleanup_pa(grinfo);
+			ext4_lock_group(sb,i);
 			kfree(grinfo);
 		}
 		num_meta_group_infos = (sbi->s_groups_count +
@@ -3447,6 +3441,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 /*
  * the function goes through all preallocation in this group and marks them
  * used in in-core bitmap. buddy must be generated from this bitmap
+ * Need to be called with ext4 group lock (ext4_lock_group)
  */
 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 					ext4_group_t group)
@@ -3468,7 +3463,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 	 * allocation in buddy when concurrent ext4_mb_put_pa()
 	 * is dropping preallocation
 	 */
-	list_for_each_rcu(cur, &grp->bb_prealloc_list) {
+	list_for_each(cur, &grp->bb_prealloc_list) {
 		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
 		spin_lock(&pa->pa_lock);
 		ext4_get_group_no_and_offset(sb, pa->pa_pstart,
@@ -3492,7 +3487,6 @@ static void ext4_mb_pa_callback(struct rcu_head *head)
 	pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
 	kmem_cache_free(ext4_pspace_cachep, pa);
 }
-#define mb_call_rcu(__pa)	call_rcu(&(__pa)->u.pa_rcu, ext4_mb_pa_callback)
 
 /*
  * drops a reference to preallocated space descriptor
@@ -3534,14 +3528,14 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 	 * against that pair
 	 */
 	ext4_lock_group(sb, grp);
-	list_del_rcu(&pa->pa_group_list);
+	list_del(&pa->pa_group_list);
 	ext4_unlock_group(sb, grp);
 
 	spin_lock(pa->pa_obj_lock);
 	list_del_rcu(&pa->pa_inode_list);
 	spin_unlock(pa->pa_obj_lock);
 
-	mb_call_rcu(pa);
+	call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
 }
 
 /*
@@ -3621,7 +3615,7 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 	pa->pa_inode = ac->ac_inode;
 
 	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
-	list_add_rcu(&pa->pa_group_list, &grp->bb_prealloc_list);
+	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
 	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 
 	spin_lock(pa->pa_obj_lock);
@@ -3678,7 +3672,7 @@ static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 	pa->pa_inode = NULL;
 
 	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
-	list_add_rcu(&pa->pa_group_list, &grp->bb_prealloc_list);
+	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
 	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 
 	spin_lock(pa->pa_obj_lock);
@@ -3859,7 +3853,7 @@ repeat:
 
 		spin_unlock(&pa->pa_lock);
 
-		list_del_rcu(&pa->pa_group_list);
+		list_del(&pa->pa_group_list);
 		list_add(&pa->u.pa_tmp_list, &list);
 	}
 
@@ -3895,7 +3889,7 @@ repeat:
 			ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
 
 		list_del(&pa->u.pa_tmp_list);
-		mb_call_rcu(pa);
+		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
 	}
 
 out:
@@ -3948,9 +3942,8 @@ repeat:
 			spin_unlock(&pa->pa_lock);
 			spin_unlock(&ei->i_prealloc_lock);
 			printk(KERN_ERR "uh-oh! used pa while discarding\n");
-			dump_stack();
-			current->state = TASK_UNINTERRUPTIBLE;
-			schedule_timeout(HZ);
+			WARN_ON(1);
+			schedule_timeout_uninterruptible(HZ);
 			goto repeat;
 
 		}
@@ -3978,8 +3971,7 @@ repeat:
 		 * add a flag to force wait only in case
 		 * of ->clear_inode(), but not in case of
 		 * regular truncate */
-		current->state = TASK_UNINTERRUPTIBLE;
-		schedule_timeout(HZ);
+		schedule_timeout_uninterruptible(HZ);
 		goto repeat;
 	}
 	spin_unlock(&ei->i_prealloc_lock);
@@ -3999,7 +3991,7 @@ repeat:
 		}
 
 		ext4_lock_group(sb, group);
-		list_del_rcu(&pa->pa_group_list);
+		list_del(&pa->pa_group_list);
 		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
 		ext4_unlock_group(sb, group);
 
@@ -4007,7 +3999,7 @@ repeat:
 		brelse(bitmap_bh);
 
 		list_del(&pa->u.pa_tmp_list);
-		mb_call_rcu(pa);
+		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
 	}
 }
 
@@ -4057,7 +4049,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 		struct ext4_prealloc_space *pa;
 		ext4_grpblk_t start;
 		struct list_head *cur;
-		list_for_each_rcu(cur, &grp->bb_prealloc_list) {
+		ext4_lock_group(sb, i);
+		list_for_each(cur, &grp->bb_prealloc_list) {
 			pa = list_entry(cur, struct ext4_prealloc_space,
 					pa_group_list);
 			spin_lock(&pa->pa_lock);
@@ -4067,6 +4060,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 			printk(KERN_ERR "PA:%lu:%d:%u \n", i,
 							start, pa->pa_len);
 		}
+		ext4_lock_group(sb, i);
 
 		if (grp->bb_free == 0)
 			continue;
@@ -4076,7 +4070,10 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 	printk(KERN_ERR "\n");
 }
 #else
-#define ext4_mb_show_ac(x)
+static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
+{
+	return;
+}
 #endif
 
 /*
@@ -4097,8 +4094,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
 
 	size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
 	isize = i_size_read(ac->ac_inode) >> bsbits;
-	if (size < isize)
-		size = isize;
+	size = max(size, isize);
 
 	/* don't use group allocation for large files */
 	if (size >= sbi->s_mb_stream_request)
@@ -4108,6 +4104,11 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
 		return;
 
 	BUG_ON(ac->ac_lg != NULL);
+	/*
+	 * locality group prealloc space are per cpu. The reason for having
+	 * per cpu locality group is to reduce the contention between block
+	 * request from multiple CPUs.
+	 */
 	ac->ac_lg = &sbi->s_locality_groups[get_cpu()];
 	put_cpu();
 
@@ -4115,7 +4116,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
 	ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
 
 	/* serialize all allocations in the group */
-	down(&ac->ac_lg->lg_sem);
+	mutex_lock(&ac->ac_lg->lg_mutex);
 }
 
 static int ext4_mb_initialize_context(struct ext4_allocation_context *ac,
@@ -4209,7 +4210,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
 	if (ac->ac_buddy_page)
 		page_cache_release(ac->ac_buddy_page);
 	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
-		up(&ac->ac_lg->lg_sem);
+		mutex_unlock(&ac->ac_lg->lg_mutex);
 	ext4_mb_collect_stats(ac);
 	return 0;
 }
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 6b40f55..5e9c7e8 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -16,19 +16,15 @@
 #include <linux/ext4_jbd2.h>
 #include <linux/ext4_fs_extents.h>
 
+/*
+ * The contiguous blocks details which can be
+ * represented by a single extent
+ */
 struct list_blocks_struct {
 	ext4_lblk_t first_block, last_block;
 	ext4_fsblk_t first_pblock, last_pblock;
 };
 
-/* will go away */
-static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
-{
-	ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
-	ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1)
-								& 0xffff);
-}
-
 static int finish_range(handle_t *handle, struct inode *inode,
 				struct list_blocks_struct *lb)
 
@@ -61,15 +57,11 @@ static int finish_range(handle_t *handle, struct inode *inode,
 	/*
 	 * Make sure the credit we accumalated is not really high
 	 */
-
 	if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
-
 		retval = ext4_journal_restart(handle, needed);
 		if (retval)
 			goto err_out;
-
 	}
-
 	if (needed) {
 		retval = ext4_journal_extend(handle, needed);
 		if (retval != 0) {
@@ -81,19 +73,17 @@ static int finish_range(handle_t *handle, struct inode *inode,
 				goto err_out;
 		}
 	}
-
 	retval = ext4_ext_insert_extent(handle, inode, path, &newext);
-
 err_out:
 	lb->first_pblock = 0;
 	return retval;
 }
+
 static int update_extent_range(handle_t *handle, struct inode *inode,
 				ext4_fsblk_t pblock, ext4_lblk_t blk_num,
 				struct list_blocks_struct *lb)
 {
 	int retval;
-
 	/*
 	 * See if we can add on to the existing range (if it exists)
 	 */
@@ -112,7 +102,6 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
 	lb->first_block = lb->last_block = blk_num;
 
 	return retval;
-
 }
 
 static int update_ind_extent_range(handle_t *handle, struct inode *inode,
@@ -136,7 +125,6 @@ static int update_ind_extent_range(handle_t *handle, struct inode *inode,
 		return -EIO;
 
 	i_data = (__le32 *)bh->b_data;
-
 	for (i = 0; i < max_entries; i++, blk_count++) {
 		if (i_data[i]) {
 			retval = update_extent_range(handle, inode,
@@ -153,6 +141,7 @@ static int update_ind_extent_range(handle_t *handle, struct inode *inode,
 	return retval;
 
 }
+
 static int update_dind_extent_range(handle_t *handle, struct inode *inode,
 				    ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
 				    struct list_blocks_struct *lb)
@@ -168,13 +157,11 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
 		*blk_nump += max_entries * max_entries;
 		return 0;
 	}
-
 	bh = sb_bread(inode->i_sb, pblock);
 	if (!bh)
 		return -EIO;
 
 	i_data = (__le32 *)bh->b_data;
-
 	for (i = 0; i < max_entries; i++) {
 		if (i_data[i]) {
 			retval = update_ind_extent_range(handle, inode,
@@ -194,6 +181,7 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
 	return retval;
 
 }
+
 static int update_tind_extent_range(handle_t *handle, struct inode *inode,
 				     ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
 				     struct list_blocks_struct *lb)
@@ -209,13 +197,11 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
 		*blk_nump += max_entries * max_entries * max_entries;
 		return 0;
 	}
-
 	bh = sb_bread(inode->i_sb, pblock);
 	if (!bh)
 		return -EIO;
 
 	i_data = (__le32 *)bh->b_data;
-
 	for (i = 0; i < max_entries; i++) {
 		if (i_data[i]) {
 			retval = update_dind_extent_range(handle, inode,
@@ -228,7 +214,6 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
 			blk_count += max_entries * max_entries;
 		}
 	}
-
 	/* Update the file block number */
 	*blk_nump = blk_count;
 	brelse(bh);
@@ -236,7 +221,6 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
 
 }
 
-
 static int free_dind_blocks(handle_t *handle,
 				struct inode *inode, __le32 i_data)
 {
@@ -258,10 +242,7 @@ static int free_dind_blocks(handle_t *handle,
 	}
 	brelse(bh);
 	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
-
 	return 0;
-
-
 }
 
 static int free_tind_blocks(handle_t *handle,
@@ -277,7 +258,6 @@ static int free_tind_blocks(handle_t *handle,
 		return -EIO;
 
 	tmp_idata = (__le32 *)bh->b_data;
-
 	for (i = 0; i < max_entries; i++) {
 		if (tmp_idata[i]) {
 			retval = free_dind_blocks(handle,
@@ -290,10 +270,7 @@ static int free_tind_blocks(handle_t *handle,
 	}
 	brelse(bh);
 	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
-
 	return 0;
-
-
 }
 
 static int free_ind_block(handle_t *handle, struct inode *inode)
@@ -302,10 +279,8 @@ static int free_ind_block(handle_t *handle, struct inode *inode)
 	struct ext4_inode_info *ei = EXT4_I(inode);
 
 	if (ei->i_data[EXT4_IND_BLOCK]) {
-
 		ext4_free_blocks(handle, inode,
 				le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1);
-
 	}
 
 	if (ei->i_data[EXT4_DIND_BLOCK]) {
@@ -321,17 +296,15 @@ static int free_ind_block(handle_t *handle, struct inode *inode)
 		if (retval)
 			return retval;
 	}
-
-
 	return 0;
 }
+
 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
 				struct inode *tmp_inode, int retval)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
 
-
 	retval = free_ind_block(handle, inode);
 	if (retval)
 		goto err_out;
@@ -368,9 +341,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
 	spin_unlock(&inode->i_lock);
 
 	ext4_mark_inode_dirty(handle, inode);
-
 err_out:
-
 	return retval;
 }
 
@@ -392,7 +363,6 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
 	struct buffer_head *bh;
 	struct ext4_extent_header *eh;
 
-
 	block = idx_pblock(ix);
 	bh = sb_bread(inode->i_sb, block);
 	if (!bh)
@@ -400,24 +370,19 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
 
 	eh = (struct ext4_extent_header *)bh->b_data;
 	if (eh->eh_depth == 0) {
-
 		brelse(bh);
 		ext4_free_blocks(handle, inode, block, 1, 1);
-
 	} else {
-
 		ix = EXT_FIRST_INDEX(eh);
 		for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
 			retval = free_ext_idx(handle, inode, ix);
 			if (retval)
 				return retval;
 		}
-
 	}
-
 	return retval;
-
 }
+
 /*
  * Free the extent meta data blocks only
  */
@@ -439,10 +404,10 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
 		if (retval)
 			return retval;
 	}
-
 	return retval;
 
 }
+
 int ext4_ext_migrate(struct inode *inode, struct file *filp,
 				unsigned int cmd, unsigned long arg)
 {
@@ -455,7 +420,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
 	struct list_blocks_struct lb;
 	unsigned long max_entries;
 
-
 	if (!test_opt(inode->i_sb, EXTENTS)) {
 		/*
 		 * if mounted with noextents
@@ -468,8 +432,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
 		return -EINVAL;
 
 	down_write(&EXT4_I(inode)->i_data_sem);
-
-
 	handle = ext4_journal_start(inode,
 					EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -479,18 +441,15 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
 		retval = PTR_ERR(handle);
 		goto err_out;
 	}
-
 	tmp_inode = ext4_new_inode(handle,
 				inode->i_sb->s_root->d_inode,
 				S_IFREG);
-
 	if (IS_ERR(tmp_inode)) {
 		retval = -ENOMEM;
 		ext4_journal_stop(handle);
 		tmp_inode = NULL;
 		goto err_out;
 	}
-
 	i_size_write(tmp_inode, i_size_read(inode));
 	/*
 	 * We don't want the inode to be reclaimed
@@ -523,7 +482,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
 	 */
 	handle = ext4_journal_start(inode, 1);
 	for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
-
 		if (i_data[i]) {
 			retval = update_extent_range(handle, tmp_inode,
 						le32_to_cpu(i_data[i]),
@@ -532,7 +490,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
 				goto err_out;
 		}
 	}
-
 	if (i_data[EXT4_IND_BLOCK]) {
 		retval = update_ind_extent_range(handle, tmp_inode,
 					le32_to_cpu(i_data[EXT4_IND_BLOCK]),
@@ -542,7 +499,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
 	} else {
 		blk_count +=  max_entries;
 	}
-
 	if (i_data[EXT4_DIND_BLOCK]) {
 		retval = update_dind_extent_range(handle, tmp_inode,
 					le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
@@ -552,8 +508,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
 	} else {
 		blk_count += max_entries * max_entries;
 	}
-
-
 	if (i_data[EXT4_TIND_BLOCK]) {
 		retval = update_tind_extent_range(handle, tmp_inode,
 					le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
@@ -561,12 +515,10 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
 			if (retval)
 				goto err_out;
 	}
-
 	/*
 	 * Build the last extent
 	 */
 	retval = finish_range(handle, tmp_inode, &lb);
-
 err_out:
 	/*
 	 * We are either freeing extent information or indirect
@@ -577,14 +529,12 @@ err_out:
 	 *
 	 * FIXME!! we may be touching bitmaps in different block groups.
 	 */
-
 	if (ext4_journal_extend(handle,
 			4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) {
 
 		ext4_journal_restart(handle,
 				4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
 	}
-
 	if (retval) {
 		/*
 		 * Failure case delete the extent information with the
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cf2f612..416d919 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1841,13 +1841,14 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
 	unsigned long stripe_width =
 			le32_to_cpu(sbi->s_es->s_raid_stripe_width);
 
-	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) {
+	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
 		return sbi->s_stripe;
-	} else if (stripe_width <= sbi->s_blocks_per_group) {
+
+	if (stripe_width <= sbi->s_blocks_per_group)
 		return stripe_width;
-	} else if (stride <= sbi->s_blocks_per_group) {
+
+	if (stride <= sbi->s_blocks_per_group)
 		return stride;
-	}
 
 	return 0;
 }
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index be4ada4..8bece0e 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -225,6 +225,7 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
 		(le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
 }
 
+extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
 extern int ext4_extent_tree_init(handle_t *, struct inode *);
 extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
 extern int ext4_ext_try_to_merge(struct inode *inode,

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: Patch queue update
  2008-01-24 14:50 Patch queue update Aneesh Kumar K.V
@ 2008-01-24 16:26 ` Andreas Dilger
  2008-01-24 16:32   ` Eric Sandeen
  2008-01-24 19:50 ` Mingming Cao
  1 sibling, 1 reply; 11+ messages in thread
From: Andreas Dilger @ 2008-01-24 16:26 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Theodore Ts'o, Mingming Cao, linux-ext4@vger.kernel.org

On Jan 24, 2008  20:20 +0530, Aneesh Kumar K.V wrote:
> @@ -89,6 +89,8 @@ When mounting an ext4 filesystem, the following option are accepted:
>  extents			ext4 will use extents to address file data.  The
>  			file system will no longer be mountable by ext3.
>  
> +noextents		ext4 will not use extents for new files created.
> +

s/new files created/newly created files/

>  journal_checksum	Enable checksumming of the journal transactions.
>  			This will allow the recovery code in e2fsck and the
>  			kernel to detect corruption in the kernel.  It is a
> @@ -206,6 +208,10 @@ nobh			(a) cache disk block mapping information
>  			"nobh" option tries to avoid associating buffer
>  			heads (supported only for "writeback" mode).
>  
> +mballoc		(*)	Use the mutliblock allocator for block allocation
> +nomballoc		disabled multiblock allocator for block allocation.
> +stripe=n		filesystem blocks per stripe for a RAID configuration.

Please provide a more verbose description of what a "stripe" is, since the
RAID terminology is sadly vague.  Something like "number of filesystem blocks 
that mballoc will try to use for allocation size and alignment.  For RAID5/6
systems this should be the number of data disks * number of filesystem blocks
per data disk."


> @@ -3948,9 +3942,8 @@ repeat:
>  			spin_unlock(&pa->pa_lock);
>  			spin_unlock(&ei->i_prealloc_lock);
>  			printk(KERN_ERR "uh-oh! used pa while discarding\n");
> -			dump_stack();
> -			current->state = TASK_UNINTERRUPTIBLE;
> -			schedule_timeout(HZ);
> +			WARN_ON(1);

This printk and dump stack can just go away, we have removed it from our
mballoc patch as well because it was only needed for determining how often
this condition is hit and is otherwise useless.

> @@ -577,14 +529,12 @@ err_out:
>  	 *
>  	 * FIXME!! we may be touching bitmaps in different block groups.
>  	 */
> -
>  	if (ext4_journal_extend(handle,
>  			4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) {
>  
>  		ext4_journal_restart(handle,
>  				4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
>  	}
> -

There don't actually need to be braces here either.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Patch queue update
  2008-01-24 16:26 ` Andreas Dilger
@ 2008-01-24 16:32   ` Eric Sandeen
  0 siblings, 0 replies; 11+ messages in thread
From: Eric Sandeen @ 2008-01-24 16:32 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Theodore Ts'o, Mingming Cao,
	linux-ext4@vger.kernel.org

Andreas Dilger wrote:
> On Jan 24, 2008  20:20 +0530, Aneesh Kumar K.V wrote:
>> @@ -89,6 +89,8 @@ When mounting an ext4 filesystem, the following option are accepted:
>>  extents			ext4 will use extents to address file data.  The
>>  			file system will no longer be mountable by ext3.
>>  
>> +noextents		ext4 will not use extents for new files created.
>> +
> 
> s/new files created/newly created files/

Would a blurb about keeping ext3 disk-format compatibility be worthwhile
here?

>>  journal_checksum	Enable checksumming of the journal transactions.
>>  			This will allow the recovery code in e2fsck and the
>>  			kernel to detect corruption in the kernel.  It is a
>> @@ -206,6 +208,10 @@ nobh			(a) cache disk block mapping information
>>  			"nobh" option tries to avoid associating buffer
>>  			heads (supported only for "writeback" mode).
>>  
>> +mballoc		(*)	Use the mutliblock allocator for block allocation

speeeeling on "mutliblock" too :)

-Eric

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Patch queue update
  2008-01-24 14:50 Patch queue update Aneesh Kumar K.V
  2008-01-24 16:26 ` Andreas Dilger
@ 2008-01-24 19:50 ` Mingming Cao
  1 sibling, 0 replies; 11+ messages in thread
From: Mingming Cao @ 2008-01-24 19:50 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: Theodore Ts'o, linux-ext4@vger.kernel.org

On Thu, 2008-01-24 at 20:20 +0530, Aneesh Kumar K.V wrote:
> I have updated patches based on the review feedback from Andrew.
> 
> I have tested this on 
> 128(64p) ppc64 	sles
> 4(2p)    ppc64  debian
> 4(2p)	 x86_64	ubuntu-gutsy
> 
> Updated patches are at
> http://www.radian.org/~kvaneesh/ext4/jan-24-2008/
> http://www.radian.org/~kvaneesh/ext4/jan-24-2008/patches.tar
> 

Thanks, updated ext4 patch queue with your changes. And fixed the
checkpatch warnings with mballoc-core.patch.

Mingming

> Diff for reference
> 
> diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
> index 4f329af..ec7d349 100644
> --- a/Documentation/filesystems/ext4.txt
> +++ b/Documentation/filesystems/ext4.txt
> @@ -89,6 +89,8 @@ When mounting an ext4 filesystem, the following option are accepted:
>  extents			ext4 will use extents to address file data.  The
>  			file system will no longer be mountable by ext3.
> 
> +noextents		ext4 will not use extents for new files created.
> +
>  journal_checksum	Enable checksumming of the journal transactions.
>  			This will allow the recovery code in e2fsck and the
>  			kernel to detect corruption in the kernel.  It is a
> @@ -206,6 +208,10 @@ nobh			(a) cache disk block mapping information
>  			"nobh" option tries to avoid associating buffer
>  			heads (supported only for "writeback" mode).
> 
> +mballoc		(*)	Use the mutliblock allocator for block allocation
> +nomballoc		disabled multiblock allocator for block allocation.
> +stripe=n		filesystem blocks per stripe for a RAID configuration.
> +
> 
>  Data Mode
>  ---------
> diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
> index dec9945..4413a2d 100644
> --- a/Documentation/filesystems/proc.txt
> +++ b/Documentation/filesystems/proc.txt
> @@ -857,6 +857,45 @@ CPUs.
>  The   "procs_blocked" line gives  the  number of  processes currently blocked,
>  waiting for I/O to complete.
> 
> +1.9 Ext4 file system parameters
> +------------------------------
> +Ext4 file system have one directory per partition under /proc/fs/ext4/
> +# ls /proc/fs/ext4/hdc/
> +group_prealloc  max_to_scan  mb_groups  mb_history  min_to_scan  order2_req
> +stats  stream_req
> +
> +mb_groups:
> +This file gives the details of mutiblock allocator buddy cache of free blocks
> +
> +mb_history:
> +Multiblock allocation history.
> +
> +stats:
> +This file indicate whether the multiblock allocator should start collecting
> +statistics. The statistics are shown during unmount
> +
> +group_prealloc:
> +The multiblock allocator normalize the block allocation request to
> +group_prealloc filesystem blocks if we don't have strip value set.
> +The stripe value can be specified at mount time or during mke2fs.
> +
> +max_to_scan:
> +How long multiblock allocator can look for a best extent (in found extents)
> +
> +min_to_scan:
> +How long multiblock allocator  must look for a best extent
> +
> +order2_req:
> +Multiblock allocator use  2^N search using buddies only for requests greater
> +than or equal to order2_req. The request size is specfied in file system
> +blocks. A value of 2 indicate only if the requests are greater than or equal
> +to 4 blocks.
> +
> +stream_req:
> +Files smaller than stream_req are served by the stream allocator, whose
> +purpose is to pack requests as close each to other as possible to
> +produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16
> +filesystem block size will use group based preallocation.
> 
>  ------------------------------------------------------------------------------
>  Summary
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 982cf1a..921eeec 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -3232,19 +3232,21 @@ int bh_uptodate_or_lock(struct buffer_head *bh)
>  	return 1;
>  }
>  EXPORT_SYMBOL(bh_uptodate_or_lock);
> +
>  /**
>   * bh_submit_read: Submit a locked buffer for reading
>   * @bh: struct buffer_head
>   *
> - * Returns a negative error
> + * Returns zero on success and -EIO on error.
>   */
>  int bh_submit_read(struct buffer_head *bh)
>  {
> -	if (!buffer_locked(bh))
> -		lock_buffer(bh);
> +	BUG_ON(!buffer_locked(bh));
> 
> -	if (buffer_uptodate(bh))
> +	if (buffer_uptodate(bh)) {
> +		unlock_buffer(bh);
>  		return 0;
> +	}
> 
>  	get_bh(bh);
>  	bh->b_end_io = end_buffer_read_sync;
> @@ -3255,6 +3257,7 @@ int bh_submit_read(struct buffer_head *bh)
>  	return -EIO;
>  }
>  EXPORT_SYMBOL(bh_submit_read);
> +
>  void __init buffer_init(void)
>  {
>  	int nrpages;
> diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
> index 4ef3dc0..0d76c74 100644
> --- a/fs/ext4/defrag.c
> +++ b/fs/ext4/defrag.c
> @@ -30,14 +30,6 @@ ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
>  	return block;
>  }
> 
> -/* Will go away */
> -static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
> -{
> -	ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
> -	ex->ee_start_hi =
> -			cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
> -}
> -
>  /*
>   * this structure is used to gather extents from the tree via ioctl
>   */
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index cbda084..c2caf97 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -75,7 +75,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
>   * stores a large physical block number into an extent struct,
>   * breaking it into parts
>   */
> -static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
> +void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
>  {
>  	ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
>  	ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index e348ceb..bec699a 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -435,7 +435,7 @@ struct ext4_free_metadata {
> 
>  struct ext4_group_info {
>  	unsigned long	bb_state;
> -	unsigned long 	bb_tid;
> +	unsigned long	bb_tid;
>  	struct ext4_free_metadata *bb_md_cur;
>  	unsigned short	bb_first_free;
>  	unsigned short	bb_free;
> @@ -489,7 +489,7 @@ struct ext4_free_extent {
>   */
>  struct ext4_locality_group {
>  	/* for allocator */
> -	struct semaphore	lg_sem;		/* to serialize allocates */
> +	struct mutex		lg_mutex;	/* to serialize allocates */
>  	struct list_head	lg_prealloc_list;/* list of preallocations */
>  	spinlock_t		lg_prealloc_lock;
>  };
> @@ -564,7 +564,10 @@ struct ext4_buddy {
>  #define EXT4_MB_BUDDY(e4b)	((e4b)->bd_buddy)
> 
>  #ifndef EXT4_MB_HISTORY
> -#define ext4_mb_store_history(ac)
> +static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
> +{
> +	return;
> +}
>  #else
>  static void ext4_mb_store_history(struct ext4_allocation_context *ac);
>  #endif
> @@ -642,6 +645,10 @@ static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
> 
>  static inline int mb_test_bit(int bit, void *addr)
>  {
> +	/*
> +	 * ext4_test_bit on architecture like powerpc
> +	 * needs unsigned long aligned address
> +	 */
>  	mb_correct_addr_and_bit(bit, addr);
>  	return ext4_test_bit(bit, addr);
>  }
> @@ -670,7 +677,7 @@ static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
>  	ext4_clear_bit_atomic(lock, bit, addr);
>  }
> 
> -static inline void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
> +static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
>  {
>  	char *bb;
> 
> @@ -753,9 +760,20 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
>  }
> 
>  #else
> -#define mb_free_blocks_double(a, b, c, d)
> -#define mb_mark_used_double(a, b, c)
> -#define mb_cmp_bitmaps(a, b)
> +static inline void mb_free_blocks_double(struct inode *inode,
> +				struct ext4_buddy *e4b, int first, int count)
> +{
> +	return;
> +}
> +static inline void mb_mark_used_double(struct ext4_buddy *e4b,
> +						int first, int count)
> +{
> +	return;
> +}
> +static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
> +{
> +	return;
> +}
>  #endif
> 
>  #ifdef AGGRESSIVE_CHECK
> @@ -878,26 +896,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
>  #define mb_check_buddy(e4b)
>  #endif
> 
> -/* find most significant bit */
> -static int fmsb(unsigned short word)
> -{
> -	int order;
> -
> -	if (word > 255) {
> -		order = 7;
> -		word >>= 8;
> -	} else {
> -		order = -1;
> -	}
> -
> -	do {
> -		order++;
> -		word >>= 1;
> -	} while (word != 0);
> -
> -	return order;
> -}
> -
>  /* FIXME!! need more doc */
>  static void ext4_mb_mark_free_simple(struct super_block *sb,
>  				void *buddy, unsigned first, int len,
> @@ -918,7 +916,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
>  		max = ffs(first | border) - 1;
> 
>  		/* find how many blocks of power 2 we need to mark */
> -		min = fmsb(len);
> +		min = fls(len) - 1;
> 
>  		if (max < min)
>  			min = max;
> @@ -1030,10 +1028,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
>  	if (groups_per_page > 1) {
>  		err = -ENOMEM;
>  		i = sizeof(struct buffer_head *) * groups_per_page;
> -		bh = kmalloc(i, GFP_NOFS);
> +		bh = kzalloc(i, GFP_NOFS);
>  		if (bh == NULL)
>  			goto out;
> -		memset(bh, 0, i);
>  	} else
>  		bh = &bhs;
> 
> @@ -1056,15 +1053,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
>  		if (bh[i] == NULL)
>  			goto out;
> 
> -		if (buffer_uptodate(bh[i]))
> +		if (bh_uptodate_or_lock(bh[i]))
>  			continue;
> 
> -		lock_buffer(bh[i]);
> -		if (buffer_uptodate(bh[i])) {
> -			unlock_buffer(bh[i]);
> -			continue;
> -		}
> -
>  		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
>  			ext4_init_block_bitmap(sb, bh[i],
>  						first_group + i, desc);
> @@ -1303,7 +1294,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
>  	len = cur + len;
>  	while (cur < len) {
>  		if ((cur & 31) == 0 && (len - cur) >= 32) {
> -			/* fast path: clear whole word at once */
> +			/* fast path: set whole word at once */
>  			addr = bm + (cur >> 3);
>  			*addr = 0xffffffff;
>  			cur += 32;
> @@ -2681,7 +2672,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
>  	for (i = 0; i < NR_CPUS; i++) {
>  		struct ext4_locality_group *lg;
>  		lg = &sbi->s_locality_groups[i];
> -		sema_init(&lg->lg_sem, 1);
> +		mutex_init(&lg->lg_mutex);
>  		INIT_LIST_HEAD(&lg->lg_prealloc_list);
>  		spin_lock_init(&lg->lg_prealloc_lock);
>  	}
> @@ -2693,6 +2684,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
>  	return 0;
>  }
> 
> +/* need to called with ext4 group lock (ext4_lock_group) */
>  static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
>  {
>  	struct ext4_prealloc_space *pa;
> @@ -2701,7 +2693,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
> 
>  	list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
>  		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
> -		list_del_rcu(&pa->pa_group_list);
> +		list_del(&pa->pa_group_list);
>  		count++;
>  		kfree(pa);
>  	}
> @@ -2735,7 +2727,9 @@ int ext4_mb_release(struct super_block *sb)
>  #ifdef DOUBLE_CHECK
>  			kfree(grinfo->bb_bitmap);
>  #endif
> +			ext4_lock_group(sb,i);
>  			ext4_mb_cleanup_pa(grinfo);
> +			ext4_lock_group(sb,i);
>  			kfree(grinfo);
>  		}
>  		num_meta_group_infos = (sbi->s_groups_count +
> @@ -3447,6 +3441,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
>  /*
>   * the function goes through all preallocation in this group and marks them
>   * used in in-core bitmap. buddy must be generated from this bitmap
> + * Need to be called with ext4 group lock (ext4_lock_group)
>   */
>  static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
>  					ext4_group_t group)
> @@ -3468,7 +3463,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
>  	 * allocation in buddy when concurrent ext4_mb_put_pa()
>  	 * is dropping preallocation
>  	 */
> -	list_for_each_rcu(cur, &grp->bb_prealloc_list) {
> +	list_for_each(cur, &grp->bb_prealloc_list) {
>  		pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
>  		spin_lock(&pa->pa_lock);
>  		ext4_get_group_no_and_offset(sb, pa->pa_pstart,
> @@ -3492,7 +3487,6 @@ static void ext4_mb_pa_callback(struct rcu_head *head)
>  	pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
>  	kmem_cache_free(ext4_pspace_cachep, pa);
>  }
> -#define mb_call_rcu(__pa)	call_rcu(&(__pa)->u.pa_rcu, ext4_mb_pa_callback)
> 
>  /*
>   * drops a reference to preallocated space descriptor
> @@ -3534,14 +3528,14 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
>  	 * against that pair
>  	 */
>  	ext4_lock_group(sb, grp);
> -	list_del_rcu(&pa->pa_group_list);
> +	list_del(&pa->pa_group_list);
>  	ext4_unlock_group(sb, grp);
> 
>  	spin_lock(pa->pa_obj_lock);
>  	list_del_rcu(&pa->pa_inode_list);
>  	spin_unlock(pa->pa_obj_lock);
> 
> -	mb_call_rcu(pa);
> +	call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
>  }
> 
>  /*
> @@ -3621,7 +3615,7 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
>  	pa->pa_inode = ac->ac_inode;
> 
>  	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
> -	list_add_rcu(&pa->pa_group_list, &grp->bb_prealloc_list);
> +	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
>  	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
> 
>  	spin_lock(pa->pa_obj_lock);
> @@ -3678,7 +3672,7 @@ static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
>  	pa->pa_inode = NULL;
> 
>  	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
> -	list_add_rcu(&pa->pa_group_list, &grp->bb_prealloc_list);
> +	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
>  	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
> 
>  	spin_lock(pa->pa_obj_lock);
> @@ -3859,7 +3853,7 @@ repeat:
> 
>  		spin_unlock(&pa->pa_lock);
> 
> -		list_del_rcu(&pa->pa_group_list);
> +		list_del(&pa->pa_group_list);
>  		list_add(&pa->u.pa_tmp_list, &list);
>  	}
> 
> @@ -3895,7 +3889,7 @@ repeat:
>  			ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
> 
>  		list_del(&pa->u.pa_tmp_list);
> -		mb_call_rcu(pa);
> +		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
>  	}
> 
>  out:
> @@ -3948,9 +3942,8 @@ repeat:
>  			spin_unlock(&pa->pa_lock);
>  			spin_unlock(&ei->i_prealloc_lock);
>  			printk(KERN_ERR "uh-oh! used pa while discarding\n");
> -			dump_stack();
> -			current->state = TASK_UNINTERRUPTIBLE;
> -			schedule_timeout(HZ);
> +			WARN_ON(1);
> +			schedule_timeout_uninterruptible(HZ);
>  			goto repeat;
> 
>  		}
> @@ -3978,8 +3971,7 @@ repeat:
>  		 * add a flag to force wait only in case
>  		 * of ->clear_inode(), but not in case of
>  		 * regular truncate */
> -		current->state = TASK_UNINTERRUPTIBLE;
> -		schedule_timeout(HZ);
> +		schedule_timeout_uninterruptible(HZ);
>  		goto repeat;
>  	}
>  	spin_unlock(&ei->i_prealloc_lock);
> @@ -3999,7 +3991,7 @@ repeat:
>  		}
> 
>  		ext4_lock_group(sb, group);
> -		list_del_rcu(&pa->pa_group_list);
> +		list_del(&pa->pa_group_list);
>  		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
>  		ext4_unlock_group(sb, group);
> 
> @@ -4007,7 +3999,7 @@ repeat:
>  		brelse(bitmap_bh);
> 
>  		list_del(&pa->u.pa_tmp_list);
> -		mb_call_rcu(pa);
> +		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
>  	}
>  }
> 
> @@ -4057,7 +4049,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
>  		struct ext4_prealloc_space *pa;
>  		ext4_grpblk_t start;
>  		struct list_head *cur;
> -		list_for_each_rcu(cur, &grp->bb_prealloc_list) {
> +		ext4_lock_group(sb, i);
> +		list_for_each(cur, &grp->bb_prealloc_list) {
>  			pa = list_entry(cur, struct ext4_prealloc_space,
>  					pa_group_list);
>  			spin_lock(&pa->pa_lock);
> @@ -4067,6 +4060,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
>  			printk(KERN_ERR "PA:%lu:%d:%u \n", i,
>  							start, pa->pa_len);
>  		}
> +		ext4_lock_group(sb, i);
> 
>  		if (grp->bb_free == 0)
>  			continue;
> @@ -4076,7 +4070,10 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
>  	printk(KERN_ERR "\n");
>  }
>  #else
> -#define ext4_mb_show_ac(x)
> +static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
> +{
> +	return;
> +}
>  #endif
> 
>  /*
> @@ -4097,8 +4094,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
> 
>  	size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
>  	isize = i_size_read(ac->ac_inode) >> bsbits;
> -	if (size < isize)
> -		size = isize;
> +	size = max(size, isize);
> 
>  	/* don't use group allocation for large files */
>  	if (size >= sbi->s_mb_stream_request)
> @@ -4108,6 +4104,11 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
>  		return;
> 
>  	BUG_ON(ac->ac_lg != NULL);
> +	/*
> +	 * locality group prealloc space are per cpu. The reason for having
> +	 * per cpu locality group is to reduce the contention between block
> +	 * request from multiple CPUs.
> +	 */
>  	ac->ac_lg = &sbi->s_locality_groups[get_cpu()];
>  	put_cpu();
> 
> @@ -4115,7 +4116,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
>  	ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
> 
>  	/* serialize all allocations in the group */
> -	down(&ac->ac_lg->lg_sem);
> +	mutex_lock(&ac->ac_lg->lg_mutex);
>  }
> 
>  static int ext4_mb_initialize_context(struct ext4_allocation_context *ac,
> @@ -4209,7 +4210,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
>  	if (ac->ac_buddy_page)
>  		page_cache_release(ac->ac_buddy_page);
>  	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
> -		up(&ac->ac_lg->lg_sem);
> +		mutex_unlock(&ac->ac_lg->lg_mutex);
>  	ext4_mb_collect_stats(ac);
>  	return 0;
>  }
> diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
> index 6b40f55..5e9c7e8 100644
> --- a/fs/ext4/migrate.c
> +++ b/fs/ext4/migrate.c
> @@ -16,19 +16,15 @@
>  #include <linux/ext4_jbd2.h>
>  #include <linux/ext4_fs_extents.h>
> 
> +/*
> + * The contiguous blocks details which can be
> + * represented by a single extent
> + */
>  struct list_blocks_struct {
>  	ext4_lblk_t first_block, last_block;
>  	ext4_fsblk_t first_pblock, last_pblock;
>  };
> 
> -/* will go away */
> -static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
> -{
> -	ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
> -	ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1)
> -								& 0xffff);
> -}
> -
>  static int finish_range(handle_t *handle, struct inode *inode,
>  				struct list_blocks_struct *lb)
> 
> @@ -61,15 +57,11 @@ static int finish_range(handle_t *handle, struct inode *inode,
>  	/*
>  	 * Make sure the credit we accumalated is not really high
>  	 */
> -
>  	if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
> -
>  		retval = ext4_journal_restart(handle, needed);
>  		if (retval)
>  			goto err_out;
> -
>  	}
> -
>  	if (needed) {
>  		retval = ext4_journal_extend(handle, needed);
>  		if (retval != 0) {
> @@ -81,19 +73,17 @@ static int finish_range(handle_t *handle, struct inode *inode,
>  				goto err_out;
>  		}
>  	}
> -
>  	retval = ext4_ext_insert_extent(handle, inode, path, &newext);
> -
>  err_out:
>  	lb->first_pblock = 0;
>  	return retval;
>  }
> +
>  static int update_extent_range(handle_t *handle, struct inode *inode,
>  				ext4_fsblk_t pblock, ext4_lblk_t blk_num,
>  				struct list_blocks_struct *lb)
>  {
>  	int retval;
> -
>  	/*
>  	 * See if we can add on to the existing range (if it exists)
>  	 */
> @@ -112,7 +102,6 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
>  	lb->first_block = lb->last_block = blk_num;
> 
>  	return retval;
> -
>  }
> 
>  static int update_ind_extent_range(handle_t *handle, struct inode *inode,
> @@ -136,7 +125,6 @@ static int update_ind_extent_range(handle_t *handle, struct inode *inode,
>  		return -EIO;
> 
>  	i_data = (__le32 *)bh->b_data;
> -
>  	for (i = 0; i < max_entries; i++, blk_count++) {
>  		if (i_data[i]) {
>  			retval = update_extent_range(handle, inode,
> @@ -153,6 +141,7 @@ static int update_ind_extent_range(handle_t *handle, struct inode *inode,
>  	return retval;
> 
>  }
> +
>  static int update_dind_extent_range(handle_t *handle, struct inode *inode,
>  				    ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
>  				    struct list_blocks_struct *lb)
> @@ -168,13 +157,11 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
>  		*blk_nump += max_entries * max_entries;
>  		return 0;
>  	}
> -
>  	bh = sb_bread(inode->i_sb, pblock);
>  	if (!bh)
>  		return -EIO;
> 
>  	i_data = (__le32 *)bh->b_data;
> -
>  	for (i = 0; i < max_entries; i++) {
>  		if (i_data[i]) {
>  			retval = update_ind_extent_range(handle, inode,
> @@ -194,6 +181,7 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
>  	return retval;
> 
>  }
> +
>  static int update_tind_extent_range(handle_t *handle, struct inode *inode,
>  				     ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
>  				     struct list_blocks_struct *lb)
> @@ -209,13 +197,11 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
>  		*blk_nump += max_entries * max_entries * max_entries;
>  		return 0;
>  	}
> -
>  	bh = sb_bread(inode->i_sb, pblock);
>  	if (!bh)
>  		return -EIO;
> 
>  	i_data = (__le32 *)bh->b_data;
> -
>  	for (i = 0; i < max_entries; i++) {
>  		if (i_data[i]) {
>  			retval = update_dind_extent_range(handle, inode,
> @@ -228,7 +214,6 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
>  			blk_count += max_entries * max_entries;
>  		}
>  	}
> -
>  	/* Update the file block number */
>  	*blk_nump = blk_count;
>  	brelse(bh);
> @@ -236,7 +221,6 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
> 
>  }
> 
> -
>  static int free_dind_blocks(handle_t *handle,
>  				struct inode *inode, __le32 i_data)
>  {
> @@ -258,10 +242,7 @@ static int free_dind_blocks(handle_t *handle,
>  	}
>  	brelse(bh);
>  	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
> -
>  	return 0;
> -
> -
>  }
> 
>  static int free_tind_blocks(handle_t *handle,
> @@ -277,7 +258,6 @@ static int free_tind_blocks(handle_t *handle,
>  		return -EIO;
> 
>  	tmp_idata = (__le32 *)bh->b_data;
> -
>  	for (i = 0; i < max_entries; i++) {
>  		if (tmp_idata[i]) {
>  			retval = free_dind_blocks(handle,
> @@ -290,10 +270,7 @@ static int free_tind_blocks(handle_t *handle,
>  	}
>  	brelse(bh);
>  	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
> -
>  	return 0;
> -
> -
>  }
> 
>  static int free_ind_block(handle_t *handle, struct inode *inode)
> @@ -302,10 +279,8 @@ static int free_ind_block(handle_t *handle, struct inode *inode)
>  	struct ext4_inode_info *ei = EXT4_I(inode);
> 
>  	if (ei->i_data[EXT4_IND_BLOCK]) {
> -
>  		ext4_free_blocks(handle, inode,
>  				le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1);
> -
>  	}
> 
>  	if (ei->i_data[EXT4_DIND_BLOCK]) {
> @@ -321,17 +296,15 @@ static int free_ind_block(handle_t *handle, struct inode *inode)
>  		if (retval)
>  			return retval;
>  	}
> -
> -
>  	return 0;
>  }
> +
>  static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
>  				struct inode *tmp_inode, int retval)
>  {
>  	struct ext4_inode_info *ei = EXT4_I(inode);
>  	struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
> 
> -
>  	retval = free_ind_block(handle, inode);
>  	if (retval)
>  		goto err_out;
> @@ -368,9 +341,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
>  	spin_unlock(&inode->i_lock);
> 
>  	ext4_mark_inode_dirty(handle, inode);
> -
>  err_out:
> -
>  	return retval;
>  }
> 
> @@ -392,7 +363,6 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
>  	struct buffer_head *bh;
>  	struct ext4_extent_header *eh;
> 
> -
>  	block = idx_pblock(ix);
>  	bh = sb_bread(inode->i_sb, block);
>  	if (!bh)
> @@ -400,24 +370,19 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
> 
>  	eh = (struct ext4_extent_header *)bh->b_data;
>  	if (eh->eh_depth == 0) {
> -
>  		brelse(bh);
>  		ext4_free_blocks(handle, inode, block, 1, 1);
> -
>  	} else {
> -
>  		ix = EXT_FIRST_INDEX(eh);
>  		for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
>  			retval = free_ext_idx(handle, inode, ix);
>  			if (retval)
>  				return retval;
>  		}
> -
>  	}
> -
>  	return retval;
> -
>  }
> +
>  /*
>   * Free the extent meta data blocks only
>   */
> @@ -439,10 +404,10 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
>  		if (retval)
>  			return retval;
>  	}
> -
>  	return retval;
> 
>  }
> +
>  int ext4_ext_migrate(struct inode *inode, struct file *filp,
>  				unsigned int cmd, unsigned long arg)
>  {
> @@ -455,7 +420,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
>  	struct list_blocks_struct lb;
>  	unsigned long max_entries;
> 
> -
>  	if (!test_opt(inode->i_sb, EXTENTS)) {
>  		/*
>  		 * if mounted with noextents
> @@ -468,8 +432,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
>  		return -EINVAL;
> 
>  	down_write(&EXT4_I(inode)->i_data_sem);
> -
> -
>  	handle = ext4_journal_start(inode,
>  					EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
>  					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
> @@ -479,18 +441,15 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
>  		retval = PTR_ERR(handle);
>  		goto err_out;
>  	}
> -
>  	tmp_inode = ext4_new_inode(handle,
>  				inode->i_sb->s_root->d_inode,
>  				S_IFREG);
> -
>  	if (IS_ERR(tmp_inode)) {
>  		retval = -ENOMEM;
>  		ext4_journal_stop(handle);
>  		tmp_inode = NULL;
>  		goto err_out;
>  	}
> -
>  	i_size_write(tmp_inode, i_size_read(inode));
>  	/*
>  	 * We don't want the inode to be reclaimed
> @@ -523,7 +482,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
>  	 */
>  	handle = ext4_journal_start(inode, 1);
>  	for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
> -
>  		if (i_data[i]) {
>  			retval = update_extent_range(handle, tmp_inode,
>  						le32_to_cpu(i_data[i]),
> @@ -532,7 +490,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
>  				goto err_out;
>  		}
>  	}
> -
>  	if (i_data[EXT4_IND_BLOCK]) {
>  		retval = update_ind_extent_range(handle, tmp_inode,
>  					le32_to_cpu(i_data[EXT4_IND_BLOCK]),
> @@ -542,7 +499,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
>  	} else {
>  		blk_count +=  max_entries;
>  	}
> -
>  	if (i_data[EXT4_DIND_BLOCK]) {
>  		retval = update_dind_extent_range(handle, tmp_inode,
>  					le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
> @@ -552,8 +508,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
>  	} else {
>  		blk_count += max_entries * max_entries;
>  	}
> -
> -
>  	if (i_data[EXT4_TIND_BLOCK]) {
>  		retval = update_tind_extent_range(handle, tmp_inode,
>  					le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
> @@ -561,12 +515,10 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
>  			if (retval)
>  				goto err_out;
>  	}
> -
>  	/*
>  	 * Build the last extent
>  	 */
>  	retval = finish_range(handle, tmp_inode, &lb);
> -
>  err_out:
>  	/*
>  	 * We are either freeing extent information or indirect
> @@ -577,14 +529,12 @@ err_out:
>  	 *
>  	 * FIXME!! we may be touching bitmaps in different block groups.
>  	 */
> -
>  	if (ext4_journal_extend(handle,
>  			4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0) {
> 
>  		ext4_journal_restart(handle,
>  				4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
>  	}
> -
>  	if (retval) {
>  		/*
>  		 * Failure case delete the extent information with the
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index cf2f612..416d919 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1841,13 +1841,14 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
>  	unsigned long stripe_width =
>  			le32_to_cpu(sbi->s_es->s_raid_stripe_width);
> 
> -	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) {
> +	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
>  		return sbi->s_stripe;
> -	} else if (stripe_width <= sbi->s_blocks_per_group) {
> +
> +	if (stripe_width <= sbi->s_blocks_per_group)
>  		return stripe_width;
> -	} else if (stride <= sbi->s_blocks_per_group) {
> +
> +	if (stride <= sbi->s_blocks_per_group)
>  		return stride;
> -	}
> 
>  	return 0;
>  }
> diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
> index be4ada4..8bece0e 100644
> --- a/include/linux/ext4_fs_extents.h
> +++ b/include/linux/ext4_fs_extents.h
> @@ -225,6 +225,7 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
>  		(le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
>  }
> 
> +extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
>  extern int ext4_extent_tree_init(handle_t *, struct inode *);
>  extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
>  extern int ext4_ext_try_to_merge(struct inode *inode,
> -
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Patch queue update
@ 2008-06-15 17:21 Aneesh Kumar K.V
  2008-06-16 17:49 ` Aneesh Kumar K.V
  0 siblings, 1 reply; 11+ messages in thread
From: Aneesh Kumar K.V @ 2008-06-15 17:21 UTC (permalink / raw)
  To: Mingming Cao
  Cc: linux-ext4@vger.kernel.org, Theodore Tso, Eric Sandeen, Jan Kara

Hi Mingming,

Attached below are new patches for the patchqueue. Some of patches
update already existing patches, mostly to get them apply
cleanly after adding new patches or to fix sparse warning. The modified
patches should not have any functionality change. The diff between
the patchqueue and updated queue is attached below. That should help
to look at the changes easily.  The patch queue looked at is
as of commit 22d4c3124d50803222c14116b3fdf08dc447a119

The updated series file
http://www.radian.org/~kvaneesh/ext4/jun-15-2008/series
The patches
http://www.radian.org/~kvaneesh/ext4/jun-15-2008/
The complete patchset.
http://www.radian.org/~kvaneesh/ext4/jun-15-2008/patches.tar.gz


-aneesh


diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 03a168f..7315adc 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -47,7 +47,7 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
 			ext4_group_t block_group)
 {
 	ext4_group_t actual_group;
-	ext4_get_group_no_and_offset(sb, block, &actual_group, 0);
+	ext4_get_group_no_and_offset(sb, block, &actual_group, NULL);
 	if (actual_group == block_group)
 		return 1;
 	return 0;
@@ -538,7 +538,7 @@ void ext4_rsv_window_add(struct super_block *sb,
  * from the filesystem reservation window rb tree. Must be called with
  * rsv_lock hold.
  */
-void rsv_window_remove(struct super_block *sb,
+static void rsv_window_remove(struct super_block *sb,
 			      struct ext4_reserve_window_node *rsv)
 {
 	rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
@@ -1706,7 +1706,12 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
 	}
 
 	sbi = EXT4_SB(sb);
-	*count = ext4_has_free_blocks(sbi, *count);
+	if (!EXT4_I(inode)->i_delalloc_reserved_flag) {
+		/*
+		 * With delalloc we already reserved the blocks
+		 */
+		*count = ext4_has_free_blocks(sbi, *count);
+	}
 	if (*count == 0) {
 		*errp = -ENOSPC;
 		return 0;	/*return with ENOSPC error */
@@ -1907,7 +1912,8 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
 	le16_add_cpu(&gdp->bg_free_blocks_count, -num);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
 	spin_unlock(sb_bgl_lock(sbi, group_no));
-	percpu_counter_sub(&sbi->s_freeblocks_counter, num);
+	if (!EXT4_I(inode)->i_delalloc_reserved_flag)
+		percpu_counter_sub(&sbi->s_freeblocks_counter, num);
 
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
@@ -1977,52 +1983,53 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
 
 	ret = ext4_mb_new_blocks(handle, &ar, errp);
 	*count = ar.len;
-	/*
-	 * Account for the allocated meta blocks
-	 */
-	if (!(*errp) && (flags & EXT4_META_BLOCK)) {
-		spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
-		EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
-		spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-	}
 	return ret;
 }
 
 /*
- * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks
+ * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
  *
  * @handle:             handle to this transaction
  * @inode:              file inode
  * @goal:               given target block(filesystem wide)
+ * @count:		total number of blocks need
  * @errp:               error code
  *
- * Return allocated block number on success
+ * Return 1st allocated block numberon success, *count stores total account
+ * error stores in errp pointer
  */
-ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
-		ext4_fsblk_t goal, int *errp)
+ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
+		ext4_fsblk_t goal, unsigned long *count, int *errp)
 {
-	unsigned long count = 1;
-	return do_blk_alloc(handle, inode, 0, goal,
-			&count, errp, EXT4_META_BLOCK);
+	ext4_fsblk_t ret;
+	ret = do_blk_alloc(handle, inode, 0, goal,
+				count, errp, EXT4_META_BLOCK);
+	/*
+	 * Account for the allocated meta blocks
+	 */
+	if (!(*errp)) {
+		spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+		EXT4_I(inode)->i_allocated_meta_blocks += *count;
+		spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+	}
+	return ret;
 }
 
 /*
- * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
+ * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks
  *
  * @handle:             handle to this transaction
  * @inode:              file inode
  * @goal:               given target block(filesystem wide)
- * @count:		total number of blocks need
  * @errp:               error code
  *
- * Return 1st allocated block numberon success, *count stores total account
- * error stores in errp pointer
+ * Return allocated block number on success
  */
-ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
-		ext4_fsblk_t goal, unsigned long *count, int *errp)
+ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
+		ext4_fsblk_t goal, int *errp)
 {
-	return do_blk_alloc(handle, inode, 0, goal,
-			count, errp, EXT4_META_BLOCK);
+	unsigned long count = 1;
+	return ext4_new_meta_blocks(handle, inode, goal, &count, errp);
 }
 
 /*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index fdd8983..92d3aab 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -592,11 +592,15 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 	}
 
 	path[ppos].p_depth = i;
+	path[ppos].p_ext = NULL;
 	path[ppos].p_idx = NULL;
 
 	/* find extent */
 	ext4_ext_binsearch(inode, path + ppos, block);
-	path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+	/* if not an empty leaf */
+	if (path[ppos].p_ext)
+		path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+
 
 	ext4_ext_show_path(inode, path);
 
@@ -3120,7 +3124,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 	struct fiemap		*fiemap_s;
 	struct fiemap_extent	fm_extent;
 	size_t			tot_mapping_len;
-	char			*cur_ext_ptr;
+	char __user		*cur_ext_ptr;
 	int			current_extent;
 	int			err;
 };
@@ -3128,7 +3132,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 /*
  * Callback function called for each extent to gather FIEMAP information.
  */
-int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
+static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
 		       struct ext4_ext_cache *newex, struct ext4_extent *ex,
 		       void *data)
 {
@@ -3252,7 +3256,7 @@ int ext4_fiemap(struct inode *inode, unsigned long arg)
 	start_blk = fiemap_s->fm_start >> inode->i_sb->s_blocksize_bits;
 	fiemap_i.fiemap_s = fiemap_s;
 	fiemap_i.tot_mapping_len = 0;
-	fiemap_i.cur_ext_ptr = (char *)(arg + sizeof(*fiemap_s));
+	fiemap_i.cur_ext_ptr = (char __user *)(arg + sizeof(*fiemap_s));
 	fiemap_i.current_extent = 0;
 	fiemap_i.err = 0;
 
@@ -3277,18 +3281,18 @@ int ext4_fiemap(struct inode *inode, unsigned long arg)
 	if (fiemap_i.current_extent != 0 &&
 	    fiemap_i.current_extent < fiemap_s->fm_extent_count &&
 	    !(fiemap_s->fm_flags & FIEMAP_FLAG_NUM_EXTENTS)) {
-		char *dest;
+		char __user *dest;
 
 		last_extent = &fiemap_i.fm_extent;
 		last_extent->fe_flags |= FIEMAP_EXTENT_LAST;
-		dest = (char *)arg + sizeof(*fiemap_s) + fm_extent_size *
+		dest = (char __user *)arg + sizeof(*fiemap_s) + fm_extent_size *
 						(fiemap_s->fm_extent_count - 1);
 		err = copy_to_user(dest, last_extent, fm_extent_size);
 		if (err)
 			goto out_free;
 	}
 
-	err = copy_to_user((void *)arg, fiemap_s, sizeof(*fiemap_s));
+	err = copy_to_user((void __user *)arg, fiemap_s, sizeof(*fiemap_s));
 
 out_free:
 	kfree(fiemap_s);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 7823405..dc8bfc4 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -820,7 +820,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
 	ei->i_state = EXT4_STATE_NEW;
 
 	ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
-	jbd2_journal_init_jbd_inode(&ei->jinode, inode);
 
 	ret = inode;
 	if(DQUOT_ALLOC_INODE(inode)) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5aee4b0..fef2574 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -525,7 +525,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
 				ext4_fsblk_t new_blocks[4], int *err)
 {
 	int target, i;
-	long count = 0, blk_allocated = 0;
+	unsigned long count = 0, blk_allocated = 0;
 	int index = 0;
 	ext4_fsblk_t current_block = 0;
 	int ret = 0;
@@ -1561,7 +1561,7 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
 	handle_t *handle = NULL;
 
 	handle = ext4_journal_current_handle();
-	BUG_ON(handle == 0);
+	BUG_ON(handle == NULL);
 	BUG_ON(create == 0);
 
 	ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
@@ -1606,11 +1606,12 @@ static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
 	return !buffer_mapped(bh) || buffer_delay(bh);
 }
 
-/* FIXME!! only support data=writeback mode */
 /*
  * get called vi ext4_da_writepages after taking page lock
  * We may end up doing block allocation here in case
  * mpage_da_map_blocks failed to allocate blocks.
+ *
+ * We also get called via journal_submit_inode_data_buffers
  */
 static int ext4_da_writepage(struct page *page,
 				struct writeback_control *wbc)
@@ -1629,6 +1630,7 @@ static int ext4_da_writepage(struct page *page,
 		 * ext4_da_writepages() but directly (shrink_page_list).
 		 * We cannot easily start a transaction here so we just skip
 		 * writing the page in case we would have to do so.
+		 * We reach here also via journal_submit_inode_data_buffers
 		 */
 		size = i_size_read(inode);
 
@@ -1644,8 +1646,11 @@ static int ext4_da_writepage(struct page *page,
 			 * We can't do block allocation under
 			 * page lock without a handle . So redirty
 			 * the page and return
+			 * We may reach here when we do a journal commit
+			 * via journal_submit_inode_data_buffers.
+			 * If we don't have mapping block we just ignore
+			 * them
 			 */
-			BUG_ON(wbc->sync_mode != WB_SYNC_NONE);
 			redirty_page_for_writepage(wbc, page);
 			unlock_page(page);
 			return 0;
@@ -1660,7 +1665,6 @@ static int ext4_da_writepage(struct page *page,
 	return ret;
 }
 
-
 /*
  * For now just follow the DIO way to estimate the max credits
  * needed to write out EXT4_MAX_WRITEBACK_PAGES.
@@ -1693,7 +1697,7 @@ static int ext4_da_writepages(struct address_space *mapping,
 		return 0;
 
 	/*
-	 *  Estimate the worse case needed credits to write out
+	 * Estimate the worse case needed credits to write out
 	 * EXT4_MAX_BUF_BLOCKS pages
 	 */
 	needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
@@ -1715,6 +1719,19 @@ static int ext4_da_writepages(struct address_space *mapping,
 			ret = PTR_ERR(handle);
 			goto out_writepages;
 		}
+		if (ext4_should_order_data(inode)) {
+			/*
+			 * With ordered mode we need to add
+			 * the inode to the journal handle
+			 * when we do block allocation.
+			 */
+			ret = ext4_jbd2_file_inode(handle, inode);
+			if (ret) {
+				ext4_journal_stop(handle);
+				goto out_writepages;
+			}
+
+		}
 		/*
 		 * set the max dirty pages could be write at a time
 		 * to fit into the reserved transaction credits
@@ -1749,15 +1766,17 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned flags,
 				struct page **pagep, void **fsdata)
 {
-	int ret;
+	int ret, retries = 0;
 	struct page *page;
 	pgoff_t index;
 	unsigned from, to;
+	struct inode *inode = mapping->host;
 
 	index = pos >> PAGE_CACHE_SHIFT;
 	from = pos & (PAGE_CACHE_SIZE - 1);
 	to = from + len;
 
+retry:
 	page = __grab_cache_page(mapping, index);
 	if (!page)
 		return -ENOMEM;
@@ -1769,6 +1788,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
 		unlock_page(page);
 		page_cache_release(page);
 	}
+	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
 
 	return ret;
 }
@@ -2228,7 +2249,10 @@ static int ext4_journalled_set_page_dirty(struct page *page)
 
 void ext4_set_aops(struct inode *inode)
 {
-	if (ext4_should_order_data(inode))
+	if (ext4_should_order_data(inode) &&
+		test_opt(inode->i_sb, DELALLOC))
+		inode->i_mapping->a_ops = &ext4_da_aops;
+	else if (ext4_should_order_data(inode))
 		inode->i_mapping->a_ops = &ext4_ordered_aops;
 	else if (ext4_should_writeback_data(inode) &&
 		 test_opt(inode->i_sb, DELALLOC))
@@ -3887,18 +3911,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 	return err;
 }
 
-static int ext4_bh_prepare_fill(handle_t *handle, struct buffer_head *bh)
-{
-	if (!buffer_mapped(bh)) {
-		/*
-		 * Mark buffer as dirty so that
-		 * block_write_full_page() writes it
-		 */
-		set_buffer_dirty(bh);
-	}
-	return 0;
-}
-
 static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
 {
 	return !buffer_mapped(bh);
@@ -3908,13 +3920,10 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 {
 	loff_t size;
 	unsigned long len;
-	int err, ret = -EINVAL;
-	handle_t *handle;
+	int ret = -EINVAL;
 	struct file *file = vma->vm_file;
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
-	struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE,
-					 .nr_to_write = 1 };
 
 	/*
 	 * Get i_alloc_sem to stop truncates messing with the inode. We cannot
@@ -3941,38 +3950,23 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 		if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
 				       ext4_bh_unmapped))
 			goto out_unlock;
-		/*
-		 * Now mark all the  buffer head dirty so
-		 * that writepage can write it
-		 */
-		walk_page_buffers(NULL, page_buffers(page), 0, len,
-					NULL, ext4_bh_prepare_fill);
 	}
 	/*
-	 * OK, we need to fill the hole... Lock the page and do writepage.
-	 * We can't do write_begin and write_end here because we don't
-	 * have inode_mutex and that allow parallel write_begin, write_end call.
-	 * (lock_page prevent this from happening on the same page though)
+	 * OK, we need to fill the hole... Do write_begin write_end
+	 * to do block allocation/reservation.We are not holding
+	 * inode.i__mutex here. That allow * parallel write_begin,
+	 * write_end call. lock_page prevent this from happening
+	 * on the same page though
 	 */
-	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
+	ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
+			len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	if (ret < 0)
 		goto out_unlock;
-	}
-	lock_page(page);
-	wbc.range_start = page_offset(page);
-	wbc.range_end = page_offset(page) + len;
-	if (!ext4_should_journal_data(inode)) {
-		ret = __ext4_normal_writepage(page, &wbc);
-		if (!ret && ext4_should_order_data(inode))
-			ret = ext4_jbd2_file_inode(handle, inode);
-	} else {
-		ret = __ext4_journalled_writepage(page, &wbc);
-	}
-	/* Page got unlocked in writepage */
-	err = ext4_journal_stop(handle);
-	if (!ret)
-		ret = err;
+	ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
+			len, len, page, NULL);
+	if (ret < 0)
+		goto out_unlock;
+	ret = 0;
 out_unlock:
 	up_read(&inode->i_alloc_sem);
 	return ret;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6fa08ca..fde1ae9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -578,6 +578,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	ei->i_allocated_meta_blocks = 0;
 	ei->i_delalloc_reserved_flag = 0;
 	spin_lock_init(&(ei->i_block_reservation_lock));
+	jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
 	return &ei->vfs_inode;
 }
 
@@ -1878,8 +1879,8 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
 }
 
 static int ext4_fill_super (struct super_block *sb, void *data, int silent)
-				__releases(kernel_sem)
-				__acquires(kernel_sem)
+				__releases(kernel_lock)
+				__acquires(kernel_lock)
 
 {
 	struct buffer_head * bh;
@@ -1996,7 +1997,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	 * Use -o nodelalloc to turn it off
 	 */
 	set_opt(sbi->s_mount_opt, DELALLOC);
-	set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
 
 
 	if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 483183d..32ca3c3 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -22,6 +22,8 @@
 #include <linux/pagemap.h>
 #include <linux/jiffies.h>
 #include <linux/crc32.h>
+#include <linux/writeback.h>
+#include <linux/backing-dev.h>
 
 /*
  * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -185,6 +187,30 @@ static int journal_wait_on_commit_record(struct buffer_head *bh)
 }
 
 /*
+ * write the filemap data using writepage() address_space_operations.
+ * We don't do block allocation here even for delalloc. We don't
+ * use writepages() because with dealyed allocation we may be doing
+ * block allocation in writepages().
+ */
+static int journal_submit_inode_data_buffers(struct address_space *mapping)
+{
+	int ret;
+	struct writeback_control wbc = {
+		.sync_mode =  WB_SYNC_ALL,
+		.nr_to_write = mapping->nrpages * 2,
+		.range_start = 0,
+		.range_end = i_size_read(mapping->host),
+		.for_writepages = 1,
+	};
+
+	if (!mapping_cap_writeback_dirty(mapping))
+		return 0;
+
+	ret = generic_writepages(mapping, &wbc);
+	return ret;
+}
+
+/*
  * Submit all the data buffers of inode associated with the transaction to
  * disk.
  *
@@ -192,7 +218,7 @@ static int journal_wait_on_commit_record(struct buffer_head *bh)
  * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
  * operate on from being released while we write out pages.
  */
-static int journal_submit_inode_data_buffers(journal_t *journal,
+static int journal_submit_data_buffers(journal_t *journal,
 		transaction_t *commit_transaction)
 {
 	struct jbd2_inode *jinode;
@@ -204,8 +230,13 @@ static int journal_submit_inode_data_buffers(journal_t *journal,
 		mapping = jinode->i_vfs_inode->i_mapping;
 		jinode->i_flags |= JI_COMMIT_RUNNING;
 		spin_unlock(&journal->j_list_lock);
-		err = filemap_fdatawrite_range(mapping, 0,
-					i_size_read(jinode->i_vfs_inode));
+		/*
+		 * submit the inode data buffers. We use writepage
+		 * instead of writepages. Because writepages can do
+		 * block allocation  with delalloc. We need to write
+		 * only allocated blocks here.
+		 */
+		err = journal_submit_inode_data_buffers(mapping);
 		if (!ret)
 			ret = err;
 		spin_lock(&journal->j_list_lock);
@@ -228,7 +259,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 	struct jbd2_inode *jinode, *next_i;
 	int err, ret = 0;
 
-	/* For locking, see the comment in journal_submit_inode_data_buffers() */
+	/* For locking, see the comment in journal_submit_data_buffers() */
 	spin_lock(&journal->j_list_lock);
 	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
 		jinode->i_flags |= JI_COMMIT_RUNNING;
@@ -431,7 +462,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	 * Now start flushing things to disk, in the order they appear
 	 * on the transaction lists.  Data blocks go first.
 	 */
-	err = journal_submit_inode_data_buffers(journal, commit_transaction);
+	err = journal_submit_data_buffers(journal, commit_transaction);
 	if (err)
 		jbd2_journal_abort(journal, err);
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: Patch queue update
  2008-06-15 17:21 Aneesh Kumar K.V
@ 2008-06-16 17:49 ` Aneesh Kumar K.V
  2008-06-16 22:03   ` Mingming
  0 siblings, 1 reply; 11+ messages in thread
From: Aneesh Kumar K.V @ 2008-06-16 17:49 UTC (permalink / raw)
  To: Mingming Cao
  Cc: linux-ext4@vger.kernel.org, Theodore Tso, Eric Sandeen, Jan Kara

On Sun, Jun 15, 2008 at 10:51:36PM +0530, Aneesh Kumar K.V wrote:
> Hi Mingming,
> 
> Attached below are new patches for the patchqueue. Some of patches
> update already existing patches, mostly to get them apply
> cleanly after adding new patches or to fix sparse warning. The modified
> patches should not have any functionality change. The diff between
> the patchqueue and updated queue is attached below. That should help
> to look at the changes easily.  The patch queue looked at is
> as of commit 22d4c3124d50803222c14116b3fdf08dc447a119
> 
> The updated series file
> http://www.radian.org/~kvaneesh/ext4/jun-15-2008/series
> The patches
> http://www.radian.org/~kvaneesh/ext4/jun-15-2008/
> The complete patchset.
> http://www.radian.org/~kvaneesh/ext4/jun-15-2008/patches.tar.gz
> 
> 


Update for 12-ext4-jinode-hang.patch after review.

ext4: Fix file system hang due to corrupt jinode values.

From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

While commiting the transaction we set the i_flags
of jbd2_inode to JI_COMMIT_RUNNING. This is needed to ensure
that when we are commiting the transaction we don't release
the inode via iput. In ext4_clear_inode we wait for the
flag to be cleared before we release the inode.

In ext4_new_inode we call iput for different failure cases
before initializing the jbd2_inode. This cause the failure
case to hang in iput as show below. Fix the same by initializing
jbd2_inode early.

 [<c022d20e>] ? prepare_to_wait+0x49/0x4e
 [<c02cf430>] jbd2_journal_release_jbd_inode+0x86/0xcc
 [<c022d0e4>] ? wake_bit_function+0x0/0x43
 [<c02b329b>] ext4_clear_inode+0x45/0x4a
 [<c02748f7>] clear_inode+0x5f/0xae
 [<c0274e8f>] generic_drop_inode+0x119/0x13a
 [<c0274071>] iput+0x4b/0x4e
 [<c02ab200>] ext4_new_inode+0xb29/0xb7a
 [<c0272761>] ? dput+0x19/0xdc
 [<c02cca5f>] ? jbd2_journal_start+0x8c/0xb9
 [<c02b1cb4>] ext4_create+0x81/0xe7
 [<c02b1c33>] ? ext4_create+0x0/0xe7
 [<c026aa3c>] vfs_create+0x6c/0xdb
 [<c026cfd6>] do_filp_open+0x1a6/0x6b5
 [<c026284e>] do_sys_open+0x40/0xb6
 [<c0262906>] sys_open+0x1e/0x26
 [<c0202be6>] syscall_call+0x7/0xb

Reported-by: Holger Kiehl <Holger.Kiehl@dwd.de>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Jan Kara <jack@suse.cz>
---

 fs/ext4/ialloc.c  |    1 -
 fs/ext4/inode.c   |    1 -
 fs/ext4/mballoc.c |    2 --
 fs/ext4/super.c   |    1 +
 4 files changed, 1 insertions(+), 4 deletions(-)


diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 7823405..dc8bfc4 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -820,7 +820,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
 	ei->i_state = EXT4_STATE_NEW;
 
 	ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
-	jbd2_journal_init_jbd_inode(&ei->jinode, inode);
 
 	ret = inode;
 	if(DQUOT_ALLOC_INODE(inode)) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fef2574..43d67e0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3125,7 +3125,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
 #endif
 	ei->i_block_alloc_info = NULL;
-	jbd2_journal_init_jbd_inode(&ei->jinode, inode);
 
 	ret = __ext4_get_inode_loc(inode, &iloc, 0);
 	if (ret < 0)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 034f00f..e511b95 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2259,8 +2259,6 @@ static int ext4_mb_init_backend(struct super_block *sb)
 		printk(KERN_ERR "EXT4-fs: can't get new inode\n");
 		goto err_freesgi;
 	}
-	jbd2_journal_init_jbd_inode(&EXT4_I(sbi->s_buddy_cache)->jinode,
-				    sbi->s_buddy_cache);
 	EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
 
 	metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e207555..fde1ae9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -578,6 +578,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	ei->i_allocated_meta_blocks = 0;
 	ei->i_delalloc_reserved_flag = 0;
 	spin_lock_init(&(ei->i_block_reservation_lock));
+	jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
 	return &ei->vfs_inode;
 }
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: Patch queue update
  2008-06-16 17:49 ` Aneesh Kumar K.V
@ 2008-06-16 22:03   ` Mingming
  0 siblings, 0 replies; 11+ messages in thread
From: Mingming @ 2008-06-16 22:03 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: linux-ext4@vger.kernel.org, Theodore Tso, Eric Sandeen, Jan Kara


On Mon, 2008-06-16 at 23:19 +0530, Aneesh Kumar K.V wrote:
> On Sun, Jun 15, 2008 at 10:51:36PM +0530, Aneesh Kumar K.V wrote:
> > Hi Mingming,
> > 
> > Attached below are new patches for the patchqueue. Some of patches
> > update already existing patches, mostly to get them apply
> > cleanly after adding new patches or to fix sparse warning. The modified
> > patches should not have any functionality change. The diff between
> > the patchqueue and updated queue is attached below. That should help
> > to look at the changes easily.  The patch queue looked at is
> > as of commit 22d4c3124d50803222c14116b3fdf08dc447a119
> > 
> > The updated series file
> > http://www.radian.org/~kvaneesh/ext4/jun-15-2008/series
> > The patches
> > http://www.radian.org/~kvaneesh/ext4/jun-15-2008/
> > The complete patchset.
> > http://www.radian.org/~kvaneesh/ext4/jun-15-2008/patches.tar.gz
> > 
> > 
> 

I have updated the patch queue with your changes, and place them in the
right location.

Mingming
> Update for 12-ext4-jinode-hang.patch after review.
> 
> ext4: Fix file system hang due to corrupt jinode values.
> 
> From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> 
> While commiting the transaction we set the i_flags
> of jbd2_inode to JI_COMMIT_RUNNING. This is needed to ensure
> that when we are commiting the transaction we don't release
> the inode via iput. In ext4_clear_inode we wait for the
> flag to be cleared before we release the inode.
> 
> In ext4_new_inode we call iput for different failure cases
> before initializing the jbd2_inode. This cause the failure
> case to hang in iput as show below. Fix the same by initializing
> jbd2_inode early.
> 
>  [<c022d20e>] ? prepare_to_wait+0x49/0x4e
>  [<c02cf430>] jbd2_journal_release_jbd_inode+0x86/0xcc
>  [<c022d0e4>] ? wake_bit_function+0x0/0x43
>  [<c02b329b>] ext4_clear_inode+0x45/0x4a
>  [<c02748f7>] clear_inode+0x5f/0xae
>  [<c0274e8f>] generic_drop_inode+0x119/0x13a
>  [<c0274071>] iput+0x4b/0x4e
>  [<c02ab200>] ext4_new_inode+0xb29/0xb7a
>  [<c0272761>] ? dput+0x19/0xdc
>  [<c02cca5f>] ? jbd2_journal_start+0x8c/0xb9
>  [<c02b1cb4>] ext4_create+0x81/0xe7
>  [<c02b1c33>] ? ext4_create+0x0/0xe7
>  [<c026aa3c>] vfs_create+0x6c/0xdb
>  [<c026cfd6>] do_filp_open+0x1a6/0x6b5
>  [<c026284e>] do_sys_open+0x40/0xb6
>  [<c0262906>] sys_open+0x1e/0x26
>  [<c0202be6>] syscall_call+0x7/0xb
> 
> Reported-by: Holger Kiehl <Holger.Kiehl@dwd.de>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> Acked-by: Jan Kara <jack@suse.cz>
> ---
> 
>  fs/ext4/ialloc.c  |    1 -
>  fs/ext4/inode.c   |    1 -
>  fs/ext4/mballoc.c |    2 --
>  fs/ext4/super.c   |    1 +
>  4 files changed, 1 insertions(+), 4 deletions(-)
> 
> 
> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
> index 7823405..dc8bfc4 100644
> --- a/fs/ext4/ialloc.c
> +++ b/fs/ext4/ialloc.c
> @@ -820,7 +820,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
>  	ei->i_state = EXT4_STATE_NEW;
> 
>  	ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
> -	jbd2_journal_init_jbd_inode(&ei->jinode, inode);
> 
>  	ret = inode;
>  	if(DQUOT_ALLOC_INODE(inode)) {
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index fef2574..43d67e0 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3125,7 +3125,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
>  	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
>  #endif
>  	ei->i_block_alloc_info = NULL;
> -	jbd2_journal_init_jbd_inode(&ei->jinode, inode);
> 
>  	ret = __ext4_get_inode_loc(inode, &iloc, 0);
>  	if (ret < 0)
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 034f00f..e511b95 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2259,8 +2259,6 @@ static int ext4_mb_init_backend(struct super_block *sb)
>  		printk(KERN_ERR "EXT4-fs: can't get new inode\n");
>  		goto err_freesgi;
>  	}
> -	jbd2_journal_init_jbd_inode(&EXT4_I(sbi->s_buddy_cache)->jinode,
> -				    sbi->s_buddy_cache);
>  	EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
> 
>  	metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb);
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index e207555..fde1ae9 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -578,6 +578,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
>  	ei->i_allocated_meta_blocks = 0;
>  	ei->i_delalloc_reserved_flag = 0;
>  	spin_lock_init(&(ei->i_block_reservation_lock));
> +	jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
>  	return &ei->vfs_inode;
>  }
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-06-16 22:02 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-24 14:50 Patch queue update Aneesh Kumar K.V
2008-01-24 16:26 ` Andreas Dilger
2008-01-24 16:32   ` Eric Sandeen
2008-01-24 19:50 ` Mingming Cao
  -- strict thread matches above, loose matches on Subject: below --
2008-06-15 17:21 Aneesh Kumar K.V
2008-06-16 17:49 ` Aneesh Kumar K.V
2008-06-16 22:03   ` Mingming
2008-01-10 15:33 patch " Aneesh Kumar K.V
2008-01-10 21:43 ` Andreas Dilger
2008-01-11  4:09   ` Aneesh Kumar K.V
2007-12-24  6:30 Patch " Aneesh Kumar K.V

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox