All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Ext3: Retry allocation after transaction commit (v2)
@ 2004-05-17  2:12 Theodore Ts'o
  0 siblings, 0 replies; only message in thread
From: Theodore Ts'o @ 2004-05-17  2:12 UTC (permalink / raw)
  To: Andrew Morton; +Cc: adilger, linux-kernel, ext2-devel

[-- Attachment #1: Type: text/plain, Size: 928 bytes --]

Here is a reworked version of my patch to ext3 to retry certain
filesystem operations after an ENOSPC error.  The
ext3_should_retry_alloc() function will not wait on the currently
running transaction if there is a currently active handle; hence this
should avoid deadlocks in the Lustre use case.  The patch is versus
BK-recent.

I've also included a simple, reliable test case which demonstrates the
problem this patch is intended to fix.  (Note that BK-recent is not
sufficient to address this test case, and waiting on the commiting
transaction in ext3_new_block is also not sufficient.  Been there,
tried that, didn't work.  We need to do the full-bore retry from the
top level.  The ext3_should_retry_alloc() will only wait on the
committing transaction if there is an active handle; hence Lustre will
probably also need to use ext3_should_retry_alloc() if it wants to
reliably avoid this particular problem.)

						- Ted

[-- Attachment #2: retry.patch --]
[-- Type: text/plain, Size: 10384 bytes --]

# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2004/05/16 17:29:49-04:00 tytso@think.thunk.org 
#   Adds retry logic to the ext3 filesystem in the case where forcing 
#   the current transaction out to disk may cause the filesystem operation
#   to succeed.
# 
# include/linux/ext3_fs.h
#   2004/05/16 12:39:52-04:00 tytso@think.thunk.org +1 -0
#   Add function prototype for ext3_should_retry_alloc()
# 
# fs/ext3/xattr.c
#   2004/05/16 12:39:52-04:00 tytso@think.thunk.org +5 -1
#   Add retry logic in the case of ENOSPC.
# 
# fs/ext3/namei.c
#   2004/05/16 12:39:52-04:00 tytso@think.thunk.org +20 -5
#   Add retry logic in the case of ENOSPC.
# 
# fs/ext3/inode.c
#   2004/05/16 12:39:52-04:00 tytso@think.thunk.org +5 -14
#   Add retry logic in the case of ENOSPC.
# 
# fs/ext3/balloc.c
#   2004/05/16 12:39:52-04:00 tytso@think.thunk.org +56 -6
#   Factor out the check for free blocks into ext3_has_free_blocks().
#   
#   New function, ext3_should_retry_alloc() which centralizes logic for
#   determining whether or not the top-level filesystem code should 
#   retry the operation.  This function also will wait until the 
#   current or commiting transaction is completed in the case where
#   retrying is indicated.
# 
# fs/ext3/acl.c
#   2004/05/16 12:39:52-04:00 tytso@think.thunk.org +9 -1
#   Add retry logic in the case of ENOSPC.
# 
diff -Nru a/fs/ext3/acl.c b/fs/ext3/acl.c
--- a/fs/ext3/acl.c	Sun May 16 17:36:00 2004
+++ b/fs/ext3/acl.c	Sun May 16 17:36:00 2004
@@ -428,7 +428,9 @@
 	error = posix_acl_chmod_masq(clone, inode->i_mode);
 	if (!error) {
 		handle_t *handle;
+		int retries = 0;
 
+	retry:
 		handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
@@ -437,6 +439,9 @@
 		}
 		error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
 		ext3_journal_stop(handle);
+		if (error == -ENOSPC && 
+		    ext3_should_retry_alloc(inode->i_sb, &retries))
+			goto retry;
 	}
 out:
 	posix_acl_release(clone);
@@ -516,7 +521,7 @@
 {
 	handle_t *handle;
 	struct posix_acl *acl;
-	int error;
+	int error, retries = 0;
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
@@ -535,11 +540,14 @@
 	} else
 		acl = NULL;
 
+retry:
 	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	error = ext3_set_acl(handle, inode, type, acl);
 	ext3_journal_stop(handle);
+	if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
 
 release_and_out:
 	posix_acl_release(acl);
diff -Nru a/fs/ext3/balloc.c b/fs/ext3/balloc.c
--- a/fs/ext3/balloc.c	Sun May 16 17:36:00 2004
+++ b/fs/ext3/balloc.c	Sun May 16 17:36:00 2004
@@ -465,6 +465,60 @@
 	return -1;
 }
 
+static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
+{
+	int free_blocks, root_blocks;
+
+	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+	root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
+	if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
+		sbi->s_resuid != current->fsuid &&
+		(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
+		return 0;
+	}
+	return 1;
+}
+
+/*
+ * Ext3_should_retry_alloc is called when ENOSPC is returned, and if
+ * it is profitable to retry the operation, this function will wait
+ * for the current or commiting transaction to complete, and then
+ * return TRUE.
+ */
+int ext3_should_retry_alloc(struct super_block *sb, int *retries)
+{
+	transaction_t *transaction = NULL;
+	journal_t *journal = EXT3_SB(sb)->s_journal;
+	tid_t tid;
+
+	if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
+		return 0;
+
+	jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
+
+	/* 
+	 * We can only force the running transaction if we don't have
+	 * an active handle; otherwise, we will deadlock.
+	 */
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_running_transaction && !current->journal_info) {
+		transaction = journal->j_running_transaction;
+		__log_start_commit(journal, transaction->t_tid);
+	} else if (journal->j_committing_transaction)
+		transaction = journal->j_committing_transaction;
+
+	if (!transaction) {
+		spin_unlock(&journal->j_state_lock);
+		return 0;	/* Nothing to retry */
+	}
+		
+	tid = transaction->t_tid;
+	spin_unlock(&journal->j_state_lock);
+	log_wait_commit(journal, tid);
+
+	return 1;
+}
+
 /*
  * ext3_new_block uses a goal block to assist allocation.  If the goal is
  * free, or there is a free block within 32 blocks of the goal, that block
@@ -485,7 +539,7 @@
 	int target_block;			/* tmp */
 	int fatal = 0, err;
 	int performed_allocation = 0;
-	int free_blocks, root_blocks;
+	int free_blocks;
 	struct super_block *sb;
 	struct ext3_group_desc *gdp;
 	struct ext3_super_block *es;
@@ -512,11 +566,7 @@
 	es = EXT3_SB(sb)->s_es;
 	ext3_debug("goal=%lu.\n", goal);
 
-	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-	root_blocks = le32_to_cpu(es->s_r_blocks_count);
-	if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
-		sbi->s_resuid != current->fsuid &&
-		(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
+	if (!ext3_has_free_blocks(sbi)) {
 		*errp = -ENOSPC;
 		goto out;
 	}
diff -Nru a/fs/ext3/inode.c b/fs/ext3/inode.c
--- a/fs/ext3/inode.c	Sun May 16 17:36:00 2004
+++ b/fs/ext3/inode.c	Sun May 16 17:36:00 2004
@@ -1081,7 +1081,7 @@
 	struct inode *inode = page->mapping->host;
 	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
 	handle_t *handle;
-	int tried_commit = 0;
+	int retries = 0;
 
 retry:
 	handle = ext3_journal_start(inode, needed_blocks);
@@ -1090,19 +1090,8 @@
 		goto out;
 	}
 	ret = block_prepare_write(page, from, to, ext3_get_block);
-	if (ret) {
-		if (ret != -ENOSPC || tried_commit)
-			goto prepare_write_failed;
-		/*
-		 * It could be that there _is_ free space, but it's all tied up
-		 * in uncommitted bitmaps.  So force a commit here, which makes
-		 * those blocks allocatable and try again.
-		 */
-		tried_commit = 1;
-		handle->h_sync = 1;
-		ext3_journal_stop(handle);
-		goto retry;
-	}
+	if (ret)
+		goto prepare_write_failed;
 
 	if (ext3_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
@@ -1111,6 +1100,8 @@
 prepare_write_failed:
 	if (ret)
 		ext3_journal_stop(handle);
+	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
 out:
 	return ret;
 }
diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c
--- a/fs/ext3/namei.c	Sun May 16 17:36:00 2004
+++ b/fs/ext3/namei.c	Sun May 16 17:36:00 2004
@@ -1630,8 +1630,9 @@
 {
 	handle_t *handle; 
 	struct inode * inode;
-	int err;
+	int err, retries = 0;
 
+retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
 					2*EXT3_QUOTA_INIT_BLOCKS);
@@ -1650,6 +1651,8 @@
 		err = ext3_add_nondir(handle, dentry, inode);
 	}
 	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
 	return err;
 }
 
@@ -1658,11 +1661,12 @@
 {
 	handle_t *handle;
 	struct inode *inode;
-	int err;
+	int err, retries = 0;
 
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
 					2*EXT3_QUOTA_INIT_BLOCKS);
@@ -1682,6 +1686,8 @@
 		err = ext3_add_nondir(handle, dentry, inode);
 	}
 	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
 	return err;
 }
 
@@ -1691,11 +1697,12 @@
 	struct inode * inode;
 	struct buffer_head * dir_block;
 	struct ext3_dir_entry_2 * de;
-	int err;
+	int err, retries = 0;
 
 	if (dir->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
 
+retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
 					2*EXT3_QUOTA_INIT_BLOCKS);
@@ -1753,6 +1760,8 @@
 	d_instantiate(dentry, inode);
 out_stop:
 	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
 	return err;
 }
 
@@ -2094,12 +2103,13 @@
 {
 	handle_t *handle;
 	struct inode * inode;
-	int l, err;
+	int l, err, retries = 0;
 
 	l = strlen(symname)+1;
 	if (l > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
+retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
 					2*EXT3_QUOTA_INIT_BLOCKS);
@@ -2138,6 +2148,8 @@
 	err = ext3_add_nondir(handle, dentry, inode);
 out_stop:
 	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
 	return err;
 }
 
@@ -2146,11 +2158,12 @@
 {
 	handle_t *handle;
 	struct inode *inode = old_dentry->d_inode;
-	int err;
+	int err, retries = 0;
 
 	if (inode->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
 
+retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS);
 	if (IS_ERR(handle))
@@ -2165,6 +2178,8 @@
 
 	err = ext3_add_nondir(handle, dentry, inode);
 	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
 	return err;
 }
 
diff -Nru a/fs/ext3/xattr.c b/fs/ext3/xattr.c
--- a/fs/ext3/xattr.c	Sun May 16 17:36:00 2004
+++ b/fs/ext3/xattr.c	Sun May 16 17:36:00 2004
@@ -875,8 +875,9 @@
 	       const void *value, size_t value_len, int flags)
 {
 	handle_t *handle;
-	int error;
+	int error, retries = 0;
 
+retry:
 	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
 	if (IS_ERR(handle)) {
 		error = PTR_ERR(handle);
@@ -886,6 +887,9 @@
 		error = ext3_xattr_set_handle(handle, inode, name_index, name,
 					      value, value_len, flags);
 		error2 = ext3_journal_stop(handle);
+		if (error == -ENOSPC && 
+		    ext3_should_retry_alloc(inode->i_sb, &retries))
+			goto retry;
 		if (error == 0)
 			error = error2;
 	}
diff -Nru a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
--- a/include/linux/ext3_fs.h	Sun May 16 17:36:00 2004
+++ b/include/linux/ext3_fs.h	Sun May 16 17:36:00 2004
@@ -689,6 +689,7 @@
 extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
 						    unsigned int block_group,
 						    struct buffer_head ** bh);
+extern int ext3_should_retry_alloc(struct super_block *sb, int *retries);
 
 /* dir.c */
 extern int ext3_check_dir_entry(const char *, struct inode *,

[-- Attachment #3: test-retry --]
[-- Type: text/plain, Size: 721 bytes --]

#!/bin/sh
#
#
TEST_DIR=/tmp
IMAGE=$TEST_DIR/retry.img
MNTPT=$TEST_DIR/retry.mnt
TEST_SRC=/usr/projects/e2fsprogs/e2fsprogs/build
MKE2FS_OPTS=""
IMAGE_SIZE=8192

umount $MNTPT
dd if=/dev/zero of=$IMAGE bs=4k count=$IMAGE_SIZE
mke2fs -j -F $MKE2FS_OPTS $IMAGE 

function test_log ()
{
	echo $*
	logger -p local4.notice $*
}

mkdir -p $MNTPT
mount -o loop -t ext3 $IMAGE $MNTPT
test_log Retry test: BEGIN
for i in `seq 1 3`
do
	test_log "Retry test: Loop $i"
	echo 2 > /proc/sys/fs/jbd-debug
	while ! mkdir -p $MNTPT/foo/bar
	do
		test_log "Retry test: mkdir failed"
		sleep 1
	done
	echo 0 > /proc/sys/fs/jbd-debug
	cp -r $TEST_SRC $MNTPT/foo/bar 2> /dev/null
	rm -rf $MNTPT/*
done
umount $MNTPT
test_log "Retry test: END"

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2004-05-17  2:13 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-05-17  2:12 [PATCH] Ext3: Retry allocation after transaction commit (v2) Theodore Ts'o

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.