public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Ext3: Retry allocation after transaction commit (v2)
@ 2004-05-17  2:12 Theodore Ts'o
  0 siblings, 0 replies; only message in thread
From: Theodore Ts'o @ 2004-05-17  2:12 UTC (permalink / raw)
  To: Andrew Morton; +Cc: adilger, linux-kernel, ext2-devel

[-- Attachment #1: Type: text/plain, Size: 928 bytes --]

Here is a reworked version of my patch to ext3 to retry certain
filesystem operations after an ENOSPC error.  The
ext3_should_retry_alloc() function will not wait on the currently
running transaction if there is a currently active handle; hence this
should avoid deadlocks in the Lustre use case.  The patch is versus
BK-recent.

I've also included a simple, reliable test case which demonstrates the
problem this patch is intended to fix.  (Note that BK-recent is not
sufficient to address this test case, and waiting on the commiting
transaction in ext3_new_block is also not sufficient.  Been there,
tried that, didn't work.  We need to do the full-bore retry from the
top level.  The ext3_should_retry_alloc() will only wait on the
committing transaction if there is an active handle; hence Lustre will
probably also need to use ext3_should_retry_alloc() if it wants to
reliably avoid this particular problem.)

						- Ted

[-- Attachment #2: retry.patch --]
[-- Type: text/plain, Size: 10384 bytes --]

# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2004/05/16 17:29:49-04:00 tytso@think.thunk.org 
#   Adds retry logic to the ext3 filesystem in the case where forcing 
#   the current transaction out to disk may cause the filesystem operation
#   to succeed.
# 
# include/linux/ext3_fs.h
#   2004/05/16 12:39:52-04:00 tytso@think.thunk.org +1 -0
#   Add function prototype for ext3_should_retry_alloc()
# 
# fs/ext3/xattr.c
#   2004/05/16 12:39:52-04:00 tytso@think.thunk.org +5 -1
#   Add retry logic in the case of ENOSPC.
# 
# fs/ext3/namei.c
#   2004/05/16 12:39:52-04:00 tytso@think.thunk.org +20 -5
#   Add retry logic in the case of ENOSPC.
# 
# fs/ext3/inode.c
#   2004/05/16 12:39:52-04:00 tytso@think.thunk.org +5 -14
#   Add retry logic in the case of ENOSPC.
# 
# fs/ext3/balloc.c
#   2004/05/16 12:39:52-04:00 tytso@think.thunk.org +56 -6
#   Factor out the check for free blocks into ext3_has_free_blocks().
#   
#   New function, ext3_should_retry_alloc() which centralizes logic for
#   determining whether or not the top-level filesystem code should 
#   retry the operation.  This function also will wait until the 
#   current or commiting transaction is completed in the case where
#   retrying is indicated.
# 
# fs/ext3/acl.c
#   2004/05/16 12:39:52-04:00 tytso@think.thunk.org +9 -1
#   Add retry logic in the case of ENOSPC.
# 
diff -Nru a/fs/ext3/acl.c b/fs/ext3/acl.c
--- a/fs/ext3/acl.c	Sun May 16 17:36:00 2004
+++ b/fs/ext3/acl.c	Sun May 16 17:36:00 2004
@@ -428,7 +428,9 @@
 	error = posix_acl_chmod_masq(clone, inode->i_mode);
 	if (!error) {
 		handle_t *handle;
+		int retries = 0;
 
+	retry:
 		handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
@@ -437,6 +439,9 @@
 		}
 		error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
 		ext3_journal_stop(handle);
+		if (error == -ENOSPC && 
+		    ext3_should_retry_alloc(inode->i_sb, &retries))
+			goto retry;
 	}
 out:
 	posix_acl_release(clone);
@@ -516,7 +521,7 @@
 {
 	handle_t *handle;
 	struct posix_acl *acl;
-	int error;
+	int error, retries = 0;
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
@@ -535,11 +540,14 @@
 	} else
 		acl = NULL;
 
+retry:
 	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	error = ext3_set_acl(handle, inode, type, acl);
 	ext3_journal_stop(handle);
+	if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
 
 release_and_out:
 	posix_acl_release(acl);
diff -Nru a/fs/ext3/balloc.c b/fs/ext3/balloc.c
--- a/fs/ext3/balloc.c	Sun May 16 17:36:00 2004
+++ b/fs/ext3/balloc.c	Sun May 16 17:36:00 2004
@@ -465,6 +465,60 @@
 	return -1;
 }
 
+static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
+{
+	int free_blocks, root_blocks;
+
+	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+	root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
+	if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
+		sbi->s_resuid != current->fsuid &&
+		(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
+		return 0;
+	}
+	return 1;
+}
+
+/*
+ * Ext3_should_retry_alloc is called when ENOSPC is returned, and if
+ * it is profitable to retry the operation, this function will wait
+ * for the current or commiting transaction to complete, and then
+ * return TRUE.
+ */
+int ext3_should_retry_alloc(struct super_block *sb, int *retries)
+{
+	transaction_t *transaction = NULL;
+	journal_t *journal = EXT3_SB(sb)->s_journal;
+	tid_t tid;
+
+	if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
+		return 0;
+
+	jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
+
+	/* 
+	 * We can only force the running transaction if we don't have
+	 * an active handle; otherwise, we will deadlock.
+	 */
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_running_transaction && !current->journal_info) {
+		transaction = journal->j_running_transaction;
+		__log_start_commit(journal, transaction->t_tid);
+	} else if (journal->j_committing_transaction)
+		transaction = journal->j_committing_transaction;
+
+	if (!transaction) {
+		spin_unlock(&journal->j_state_lock);
+		return 0;	/* Nothing to retry */
+	}
+		
+	tid = transaction->t_tid;
+	spin_unlock(&journal->j_state_lock);
+	log_wait_commit(journal, tid);
+
+	return 1;
+}
+
 /*
  * ext3_new_block uses a goal block to assist allocation.  If the goal is
  * free, or there is a free block within 32 blocks of the goal, that block
@@ -485,7 +539,7 @@
 	int target_block;			/* tmp */
 	int fatal = 0, err;
 	int performed_allocation = 0;
-	int free_blocks, root_blocks;
+	int free_blocks;
 	struct super_block *sb;
 	struct ext3_group_desc *gdp;
 	struct ext3_super_block *es;
@@ -512,11 +566,7 @@
 	es = EXT3_SB(sb)->s_es;
 	ext3_debug("goal=%lu.\n", goal);
 
-	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-	root_blocks = le32_to_cpu(es->s_r_blocks_count);
-	if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
-		sbi->s_resuid != current->fsuid &&
-		(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
+	if (!ext3_has_free_blocks(sbi)) {
 		*errp = -ENOSPC;
 		goto out;
 	}
diff -Nru a/fs/ext3/inode.c b/fs/ext3/inode.c
--- a/fs/ext3/inode.c	Sun May 16 17:36:00 2004
+++ b/fs/ext3/inode.c	Sun May 16 17:36:00 2004
@@ -1081,7 +1081,7 @@
 	struct inode *inode = page->mapping->host;
 	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
 	handle_t *handle;
-	int tried_commit = 0;
+	int retries = 0;
 
 retry:
 	handle = ext3_journal_start(inode, needed_blocks);
@@ -1090,19 +1090,8 @@
 		goto out;
 	}
 	ret = block_prepare_write(page, from, to, ext3_get_block);
-	if (ret) {
-		if (ret != -ENOSPC || tried_commit)
-			goto prepare_write_failed;
-		/*
-		 * It could be that there _is_ free space, but it's all tied up
-		 * in uncommitted bitmaps.  So force a commit here, which makes
-		 * those blocks allocatable and try again.
-		 */
-		tried_commit = 1;
-		handle->h_sync = 1;
-		ext3_journal_stop(handle);
-		goto retry;
-	}
+	if (ret)
+		goto prepare_write_failed;
 
 	if (ext3_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
@@ -1111,6 +1100,8 @@
 prepare_write_failed:
 	if (ret)
 		ext3_journal_stop(handle);
+	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
 out:
 	return ret;
 }
diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c
--- a/fs/ext3/namei.c	Sun May 16 17:36:00 2004
+++ b/fs/ext3/namei.c	Sun May 16 17:36:00 2004
@@ -1630,8 +1630,9 @@
 {
 	handle_t *handle; 
 	struct inode * inode;
-	int err;
+	int err, retries = 0;
 
+retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
 					2*EXT3_QUOTA_INIT_BLOCKS);
@@ -1650,6 +1651,8 @@
 		err = ext3_add_nondir(handle, dentry, inode);
 	}
 	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
 	return err;
 }
 
@@ -1658,11 +1661,12 @@
 {
 	handle_t *handle;
 	struct inode *inode;
-	int err;
+	int err, retries = 0;
 
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
 					2*EXT3_QUOTA_INIT_BLOCKS);
@@ -1682,6 +1686,8 @@
 		err = ext3_add_nondir(handle, dentry, inode);
 	}
 	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
 	return err;
 }
 
@@ -1691,11 +1697,12 @@
 	struct inode * inode;
 	struct buffer_head * dir_block;
 	struct ext3_dir_entry_2 * de;
-	int err;
+	int err, retries = 0;
 
 	if (dir->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
 
+retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
 					2*EXT3_QUOTA_INIT_BLOCKS);
@@ -1753,6 +1760,8 @@
 	d_instantiate(dentry, inode);
 out_stop:
 	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
 	return err;
 }
 
@@ -2094,12 +2103,13 @@
 {
 	handle_t *handle;
 	struct inode * inode;
-	int l, err;
+	int l, err, retries = 0;
 
 	l = strlen(symname)+1;
 	if (l > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
+retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
 					2*EXT3_QUOTA_INIT_BLOCKS);
@@ -2138,6 +2148,8 @@
 	err = ext3_add_nondir(handle, dentry, inode);
 out_stop:
 	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
 	return err;
 }
 
@@ -2146,11 +2158,12 @@
 {
 	handle_t *handle;
 	struct inode *inode = old_dentry->d_inode;
-	int err;
+	int err, retries = 0;
 
 	if (inode->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
 
+retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS);
 	if (IS_ERR(handle))
@@ -2165,6 +2178,8 @@
 
 	err = ext3_add_nondir(handle, dentry, inode);
 	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
 	return err;
 }
 
diff -Nru a/fs/ext3/xattr.c b/fs/ext3/xattr.c
--- a/fs/ext3/xattr.c	Sun May 16 17:36:00 2004
+++ b/fs/ext3/xattr.c	Sun May 16 17:36:00 2004
@@ -875,8 +875,9 @@
 	       const void *value, size_t value_len, int flags)
 {
 	handle_t *handle;
-	int error;
+	int error, retries = 0;
 
+retry:
 	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
 	if (IS_ERR(handle)) {
 		error = PTR_ERR(handle);
@@ -886,6 +887,9 @@
 		error = ext3_xattr_set_handle(handle, inode, name_index, name,
 					      value, value_len, flags);
 		error2 = ext3_journal_stop(handle);
+		if (error == -ENOSPC && 
+		    ext3_should_retry_alloc(inode->i_sb, &retries))
+			goto retry;
 		if (error == 0)
 			error = error2;
 	}
diff -Nru a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
--- a/include/linux/ext3_fs.h	Sun May 16 17:36:00 2004
+++ b/include/linux/ext3_fs.h	Sun May 16 17:36:00 2004
@@ -689,6 +689,7 @@
 extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
 						    unsigned int block_group,
 						    struct buffer_head ** bh);
+extern int ext3_should_retry_alloc(struct super_block *sb, int *retries);
 
 /* dir.c */
 extern int ext3_check_dir_entry(const char *, struct inode *,

[-- Attachment #3: test-retry --]
[-- Type: text/plain, Size: 721 bytes --]

#!/bin/sh
#
#
TEST_DIR=/tmp
IMAGE=$TEST_DIR/retry.img
MNTPT=$TEST_DIR/retry.mnt
TEST_SRC=/usr/projects/e2fsprogs/e2fsprogs/build
MKE2FS_OPTS=""
IMAGE_SIZE=8192

umount $MNTPT
dd if=/dev/zero of=$IMAGE bs=4k count=$IMAGE_SIZE
mke2fs -j -F $MKE2FS_OPTS $IMAGE 

function test_log ()
{
	echo $*
	logger -p local4.notice $*
}

mkdir -p $MNTPT
mount -o loop -t ext3 $IMAGE $MNTPT
test_log Retry test: BEGIN
for i in `seq 1 3`
do
	test_log "Retry test: Loop $i"
	echo 2 > /proc/sys/fs/jbd-debug
	while ! mkdir -p $MNTPT/foo/bar
	do
		test_log "Retry test: mkdir failed"
		sleep 1
	done
	echo 0 > /proc/sys/fs/jbd-debug
	cp -r $TEST_SRC $MNTPT/foo/bar 2> /dev/null
	rm -rf $MNTPT/*
done
umount $MNTPT
test_log "Retry test: END"

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2004-05-17  2:13 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-05-17  2:12 [PATCH] Ext3: Retry allocation after transaction commit (v2) Theodore Ts'o

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox