From: Mark Fasheh <mark.fasheh@oracle.com>
To: Andrew Morton <akpm@osdl.org>, Linus Torvalds <torvalds@osdl.org>
Cc: ocfs2-devel@oss.oracle.com, linux-kernel@vger.kernel.org
Subject: [Ocfs2-devel] [git patches] ocfs2 fixes
Date: Mon, 10 Apr 2006 16:03:06 -0700 [thread overview]
Message-ID: <20060410230306.GC25194@ca-server1.us.oracle.com> (raw)
We're post rc1 so this is fixes only. The configfs ones are minor, but
small, trivial, and obvious - so I included those as well.
Please pull from 'upstream-linus' branch of
git://oss.oracle.com/home/sourcebo/git/ocfs2.git
to receive the following updates:
fs/Kconfig | 2 -
fs/configfs/dir.c | 2 -
fs/ocfs2/cluster/heartbeat.c | 40 ++++++++++++++++++-----
fs/ocfs2/dlm/userdlm.c | 74 ++++++++++++++++++++++++++++++++-----------
fs/ocfs2/file.c | 19 ++++++-----
5 files changed, 101 insertions(+), 36 deletions(-)
Adrian Bunk:
CONFIGFS_FS must depend on SYSFS
Eric Sesterhenn:
Bogus NULL pointer check in fs/configfs/dir.c
Mark Fasheh:
ocfs2: multi node truncate fix
ocfs2: remove an overly aggressive BUG() in dlmfs
ocfs2: catch an invalid ast case in dlmfs
ocfs2: Handle the DLM_CANCELGRANT case in user_unlock_ast()
ocfs2: test and set teardown flag early in user_dlm_destroy_lock()
ocfs2: Better I/O error handling in heartbeat
diff --git a/fs/Kconfig b/fs/Kconfig
index e207be6..97f3174 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -861,7 +861,7 @@ config RAMFS
config CONFIGFS_FS
tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on SYSFS && EXPERIMENTAL
help
configfs is a ram-based filesystem that provides the converse
of sysfs's functionality. Where sysfs is a filesystem-based
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8ed9b06..5638c8f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -504,7 +504,7 @@ static int populate_groups(struct config
int ret = 0;
int i;
- if (group && group->default_groups) {
+ if (group->default_groups) {
/* FYI, we're faking mkdir here
* I'm not sure we need this semaphore, as we're called
* from our parent's mkdir. That holds our parent's
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index bff0f0d..21f38ac 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -153,6 +153,7 @@ struct o2hb_region {
struct o2hb_bio_wait_ctxt {
atomic_t wc_num_reqs;
struct completion wc_io_complete;
+ int wc_error;
};
static void o2hb_write_timeout(void *arg)
@@ -186,6 +187,7 @@ static inline void o2hb_bio_wait_init(st
{
atomic_set(&wc->wc_num_reqs, num_ios);
init_completion(&wc->wc_io_complete);
+ wc->wc_error = 0;
}
/* Used in error paths too */
@@ -218,8 +220,10 @@ static int o2hb_bio_end_io(struct bio *b
{
struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
- if (error)
+ if (error) {
mlog(ML_ERROR, "IO Error %d\n", error);
+ wc->wc_error = error;
+ }
if (bio->bi_size)
return 1;
@@ -390,6 +394,8 @@ static int o2hb_read_slots(struct o2hb_r
bail_and_wait:
o2hb_wait_on_io(reg, &wc);
+ if (wc.wc_error && !status)
+ status = wc.wc_error;
if (bios) {
for(i = 0; i < num_bios; i++)
@@ -790,20 +796,24 @@ static int o2hb_highest_node(unsigned lo
return highest;
}
-static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
+static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
{
int i, ret, highest_node, change = 0;
unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
struct bio *write_bio;
struct o2hb_bio_wait_ctxt write_wc;
- if (o2nm_configured_node_map(configured_nodes, sizeof(configured_nodes)))
- return;
+ ret = o2nm_configured_node_map(configured_nodes,
+ sizeof(configured_nodes));
+ if (ret) {
+ mlog_errno(ret);
+ return ret;
+ }
highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
if (highest_node >= O2NM_MAX_NODES) {
mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
- return;
+ return -EINVAL;
}
/* No sense in reading the slots of nodes that don't exist
@@ -813,7 +823,7 @@ static void o2hb_do_disk_heartbeat(struc
ret = o2hb_read_slots(reg, highest_node + 1);
if (ret < 0) {
mlog_errno(ret);
- return;
+ return ret;
}
/* With an up to date view of the slots, we can check that no
@@ -831,7 +841,7 @@ static void o2hb_do_disk_heartbeat(struc
ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
if (ret < 0) {
mlog_errno(ret);
- return;
+ return ret;
}
i = -1;
@@ -847,6 +857,15 @@ static void o2hb_do_disk_heartbeat(struc
*/
o2hb_wait_on_io(reg, &write_wc);
bio_put(write_bio);
+ if (write_wc.wc_error) {
+ /* Do not re-arm the write timeout on I/O error - we
+ * can't be sure that the new block ever made it to
+ * disk */
+ mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
+ write_wc.wc_error, reg->hr_dev_name);
+ return write_wc.wc_error;
+ }
+
o2hb_arm_write_timeout(reg);
/* let the person who launched us know when things are steady */
@@ -854,6 +873,8 @@ static void o2hb_do_disk_heartbeat(struc
if (atomic_dec_and_test(®->hr_steady_iterations))
wake_up(&o2hb_steady_queue);
}
+
+ return 0;
}
/* Subtract b from a, storing the result in a. a *must* have a larger
@@ -913,7 +934,10 @@ static int o2hb_thread(void *data)
* likely to time itself out. */
do_gettimeofday(&before_hb);
- o2hb_do_disk_heartbeat(reg);
+ i = 0;
+ do {
+ ret = o2hb_do_disk_heartbeat(reg);
+ } while (ret && ++i < 2);
do_gettimeofday(&after_hb);
elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index c3764f4..74ca4e5 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -139,6 +139,10 @@ static void user_ast(void *opaque)
return;
}
+ mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
+ "Lockres %s, requested ivmode. flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
+
/* we're downconverting. */
if (lockres->l_requested < lockres->l_level) {
if (lockres->l_requested <=
@@ -229,23 +233,42 @@ static void user_unlock_ast(void *opaque
mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
- if (status != DLM_NORMAL)
+ if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
mlog(ML_ERROR, "Dlm returns status %d\n", status);
spin_lock(&lockres->l_lock);
- if (lockres->l_flags & USER_LOCK_IN_TEARDOWN)
+ /* The teardown flag gets set early during the unlock process,
+ * so test the cancel flag to make sure that this ast isn't
+ * for a concurrent cancel. */
+ if (lockres->l_flags & USER_LOCK_IN_TEARDOWN
+ && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
lockres->l_level = LKM_IVMODE;
- else {
+ } else if (status == DLM_CANCELGRANT) {
+ mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
+ /* We tried to cancel a convert request, but it was
+ * already granted. Don't clear the busy flag - the
+ * ast should've done this already. */
+ BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
+ lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
+ goto out_noclear;
+ } else {
+ BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
+ /* Cancel succeeded, we want to re-queue */
+ mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
lockres->l_requested = LKM_IVMODE; /* cancel an
* upconvert
* request. */
lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
/* we want the unblock thread to look at it again
* now. */
- __user_dlm_queue_lockres(lockres);
+ if (lockres->l_flags & USER_LOCK_BLOCKED)
+ __user_dlm_queue_lockres(lockres);
}
lockres->l_flags &= ~USER_LOCK_BUSY;
+out_noclear:
spin_unlock(&lockres->l_lock);
wake_up(&lockres->l_event);
@@ -268,13 +291,26 @@ static void user_dlm_unblock_lock(void *
spin_lock(&lockres->l_lock);
- BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED));
- BUG_ON(!(lockres->l_flags & USER_LOCK_QUEUED));
+ mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
+ "Lockres %s, flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
- /* notice that we don't clear USER_LOCK_BLOCKED here. That's
- * for user_ast to do. */
+ /* notice that we don't clear USER_LOCK_BLOCKED here. If it's
+ * set, we want user_ast clear it. */
lockres->l_flags &= ~USER_LOCK_QUEUED;
+ /* It's valid to get here and no longer be blocked - if we get
+ * several basts in a row, we might be queued by the first
+ * one, the unblock thread might run and clear the queued
+ * flag, and finally we might get another bast which re-queues
+ * us before our ast for the downconvert is called. */
+ if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
+ mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
+ lockres->l_name, lockres->l_flags);
+ spin_unlock(&lockres->l_lock);
+ goto drop_ref;
+ }
+
if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
mlog(0, "lock is in teardown so we do nothing\n");
spin_unlock(&lockres->l_lock);
@@ -282,7 +318,9 @@ static void user_dlm_unblock_lock(void *
}
if (lockres->l_flags & USER_LOCK_BUSY) {
- mlog(0, "BUSY flag detected...\n");
+ mlog(0, "Cancel lock %s, flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
+
if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
spin_unlock(&lockres->l_lock);
goto drop_ref;
@@ -296,14 +334,7 @@ static void user_dlm_unblock_lock(void *
LKM_CANCEL,
user_unlock_ast,
lockres);
- if (status == DLM_CANCELGRANT) {
- /* If we got this, then the ast was fired
- * before we could cancel. We cleanup our
- * state, and restart the function. */
- spin_lock(&lockres->l_lock);
- lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
- spin_unlock(&lockres->l_lock);
- } else if (status != DLM_NORMAL)
+ if (status != DLM_NORMAL)
user_log_dlm_error("dlmunlock", status, lockres);
goto drop_ref;
}
@@ -581,6 +612,14 @@ int user_dlm_destroy_lock(struct user_lo
mlog(0, "asked to destroy %s\n", lockres->l_name);
spin_lock(&lockres->l_lock);
+ if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
+ mlog(0, "Lock is already torn down\n");
+ spin_unlock(&lockres->l_lock);
+ return 0;
+ }
+
+ lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
+
while (lockres->l_flags & USER_LOCK_BUSY) {
spin_unlock(&lockres->l_lock);
@@ -606,7 +645,6 @@ int user_dlm_destroy_lock(struct user_lo
lockres->l_flags &= ~USER_LOCK_ATTACHED;
lockres->l_flags |= USER_LOCK_BUSY;
- lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
spin_unlock(&lockres->l_lock);
mlog(0, "unlocking lockres %s\n", lockres->l_name);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 34e903a..581eb45 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -260,6 +260,17 @@ static int ocfs2_truncate_file(struct in
if (new_i_size == le64_to_cpu(fe->i_size))
goto bail;
+ /* This forces other nodes to sync and drop their pages. Do
+ * this even if we have a truncate without allocation change -
+ * ocfs2 cluster sizes can be much greater than page size, so
+ * we have to truncate them anyway. */
+ status = ocfs2_data_lock(inode, 1);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ ocfs2_data_unlock(inode, 1);
+
if (le32_to_cpu(fe->i_clusters) ==
ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
@@ -272,14 +283,6 @@ static int ocfs2_truncate_file(struct in
goto bail;
}
- /* This forces other nodes to sync and drop their pages */
- status = ocfs2_data_lock(inode, 1);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
- ocfs2_data_unlock(inode, 1);
-
/* alright, we're going to need to do a full blown alloc size
* change. Orphan the inode so that recovery can complete the
* truncate if necessary. This does the task of marking
WARNING: multiple messages have this Message-ID (diff)
From: Mark Fasheh <mark.fasheh@oracle.com>
To: Andrew Morton <akpm@osdl.org>, Linus Torvalds <torvalds@osdl.org>
Cc: ocfs2-devel@oss.oracle.com, linux-kernel@vger.kernel.org
Subject: [git patches] ocfs2 fixes
Date: Mon, 10 Apr 2006 16:03:06 -0700 [thread overview]
Message-ID: <20060410230306.GC25194@ca-server1.us.oracle.com> (raw)
We're post rc1 so this is fixes only. The configfs ones are minor, but
small, trivial, and obvious - so I included those as well.
Please pull from 'upstream-linus' branch of
git://oss.oracle.com/home/sourcebo/git/ocfs2.git
to receive the following updates:
fs/Kconfig | 2 -
fs/configfs/dir.c | 2 -
fs/ocfs2/cluster/heartbeat.c | 40 ++++++++++++++++++-----
fs/ocfs2/dlm/userdlm.c | 74 ++++++++++++++++++++++++++++++++-----------
fs/ocfs2/file.c | 19 ++++++-----
5 files changed, 101 insertions(+), 36 deletions(-)
Adrian Bunk:
CONFIGFS_FS must depend on SYSFS
Eric Sesterhenn:
Bogus NULL pointer check in fs/configfs/dir.c
Mark Fasheh:
ocfs2: multi node truncate fix
ocfs2: remove an overly aggressive BUG() in dlmfs
ocfs2: catch an invalid ast case in dlmfs
ocfs2: Handle the DLM_CANCELGRANT case in user_unlock_ast()
ocfs2: test and set teardown flag early in user_dlm_destroy_lock()
ocfs2: Better I/O error handling in heartbeat
diff --git a/fs/Kconfig b/fs/Kconfig
index e207be6..97f3174 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -861,7 +861,7 @@ config RAMFS
config CONFIGFS_FS
tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on SYSFS && EXPERIMENTAL
help
configfs is a ram-based filesystem that provides the converse
of sysfs's functionality. Where sysfs is a filesystem-based
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8ed9b06..5638c8f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -504,7 +504,7 @@ static int populate_groups(struct config
int ret = 0;
int i;
- if (group && group->default_groups) {
+ if (group->default_groups) {
/* FYI, we're faking mkdir here
* I'm not sure we need this semaphore, as we're called
* from our parent's mkdir. That holds our parent's
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index bff0f0d..21f38ac 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -153,6 +153,7 @@ struct o2hb_region {
struct o2hb_bio_wait_ctxt {
atomic_t wc_num_reqs;
struct completion wc_io_complete;
+ int wc_error;
};
static void o2hb_write_timeout(void *arg)
@@ -186,6 +187,7 @@ static inline void o2hb_bio_wait_init(st
{
atomic_set(&wc->wc_num_reqs, num_ios);
init_completion(&wc->wc_io_complete);
+ wc->wc_error = 0;
}
/* Used in error paths too */
@@ -218,8 +220,10 @@ static int o2hb_bio_end_io(struct bio *b
{
struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
- if (error)
+ if (error) {
mlog(ML_ERROR, "IO Error %d\n", error);
+ wc->wc_error = error;
+ }
if (bio->bi_size)
return 1;
@@ -390,6 +394,8 @@ static int o2hb_read_slots(struct o2hb_r
bail_and_wait:
o2hb_wait_on_io(reg, &wc);
+ if (wc.wc_error && !status)
+ status = wc.wc_error;
if (bios) {
for(i = 0; i < num_bios; i++)
@@ -790,20 +796,24 @@ static int o2hb_highest_node(unsigned lo
return highest;
}
-static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
+static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
{
int i, ret, highest_node, change = 0;
unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
struct bio *write_bio;
struct o2hb_bio_wait_ctxt write_wc;
- if (o2nm_configured_node_map(configured_nodes, sizeof(configured_nodes)))
- return;
+ ret = o2nm_configured_node_map(configured_nodes,
+ sizeof(configured_nodes));
+ if (ret) {
+ mlog_errno(ret);
+ return ret;
+ }
highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
if (highest_node >= O2NM_MAX_NODES) {
mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
- return;
+ return -EINVAL;
}
/* No sense in reading the slots of nodes that don't exist
@@ -813,7 +823,7 @@ static void o2hb_do_disk_heartbeat(struc
ret = o2hb_read_slots(reg, highest_node + 1);
if (ret < 0) {
mlog_errno(ret);
- return;
+ return ret;
}
/* With an up to date view of the slots, we can check that no
@@ -831,7 +841,7 @@ static void o2hb_do_disk_heartbeat(struc
ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
if (ret < 0) {
mlog_errno(ret);
- return;
+ return ret;
}
i = -1;
@@ -847,6 +857,15 @@ static void o2hb_do_disk_heartbeat(struc
*/
o2hb_wait_on_io(reg, &write_wc);
bio_put(write_bio);
+ if (write_wc.wc_error) {
+ /* Do not re-arm the write timeout on I/O error - we
+ * can't be sure that the new block ever made it to
+ * disk */
+ mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
+ write_wc.wc_error, reg->hr_dev_name);
+ return write_wc.wc_error;
+ }
+
o2hb_arm_write_timeout(reg);
/* let the person who launched us know when things are steady */
@@ -854,6 +873,8 @@ static void o2hb_do_disk_heartbeat(struc
if (atomic_dec_and_test(®->hr_steady_iterations))
wake_up(&o2hb_steady_queue);
}
+
+ return 0;
}
/* Subtract b from a, storing the result in a. a *must* have a larger
@@ -913,7 +934,10 @@ static int o2hb_thread(void *data)
* likely to time itself out. */
do_gettimeofday(&before_hb);
- o2hb_do_disk_heartbeat(reg);
+ i = 0;
+ do {
+ ret = o2hb_do_disk_heartbeat(reg);
+ } while (ret && ++i < 2);
do_gettimeofday(&after_hb);
elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index c3764f4..74ca4e5 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -139,6 +139,10 @@ static void user_ast(void *opaque)
return;
}
+ mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
+ "Lockres %s, requested ivmode. flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
+
/* we're downconverting. */
if (lockres->l_requested < lockres->l_level) {
if (lockres->l_requested <=
@@ -229,23 +233,42 @@ static void user_unlock_ast(void *opaque
mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
- if (status != DLM_NORMAL)
+ if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
mlog(ML_ERROR, "Dlm returns status %d\n", status);
spin_lock(&lockres->l_lock);
- if (lockres->l_flags & USER_LOCK_IN_TEARDOWN)
+ /* The teardown flag gets set early during the unlock process,
+ * so test the cancel flag to make sure that this ast isn't
+ * for a concurrent cancel. */
+ if (lockres->l_flags & USER_LOCK_IN_TEARDOWN
+ && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
lockres->l_level = LKM_IVMODE;
- else {
+ } else if (status == DLM_CANCELGRANT) {
+ mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
+ /* We tried to cancel a convert request, but it was
+ * already granted. Don't clear the busy flag - the
+ * ast should've done this already. */
+ BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
+ lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
+ goto out_noclear;
+ } else {
+ BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
+ /* Cancel succeeded, we want to re-queue */
+ mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
lockres->l_requested = LKM_IVMODE; /* cancel an
* upconvert
* request. */
lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
/* we want the unblock thread to look at it again
* now. */
- __user_dlm_queue_lockres(lockres);
+ if (lockres->l_flags & USER_LOCK_BLOCKED)
+ __user_dlm_queue_lockres(lockres);
}
lockres->l_flags &= ~USER_LOCK_BUSY;
+out_noclear:
spin_unlock(&lockres->l_lock);
wake_up(&lockres->l_event);
@@ -268,13 +291,26 @@ static void user_dlm_unblock_lock(void *
spin_lock(&lockres->l_lock);
- BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED));
- BUG_ON(!(lockres->l_flags & USER_LOCK_QUEUED));
+ mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
+ "Lockres %s, flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
- /* notice that we don't clear USER_LOCK_BLOCKED here. That's
- * for user_ast to do. */
+ /* notice that we don't clear USER_LOCK_BLOCKED here. If it's
+ * set, we want user_ast clear it. */
lockres->l_flags &= ~USER_LOCK_QUEUED;
+ /* It's valid to get here and no longer be blocked - if we get
+ * several basts in a row, we might be queued by the first
+ * one, the unblock thread might run and clear the queued
+ * flag, and finally we might get another bast which re-queues
+ * us before our ast for the downconvert is called. */
+ if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
+ mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
+ lockres->l_name, lockres->l_flags);
+ spin_unlock(&lockres->l_lock);
+ goto drop_ref;
+ }
+
if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
mlog(0, "lock is in teardown so we do nothing\n");
spin_unlock(&lockres->l_lock);
@@ -282,7 +318,9 @@ static void user_dlm_unblock_lock(void *
}
if (lockres->l_flags & USER_LOCK_BUSY) {
- mlog(0, "BUSY flag detected...\n");
+ mlog(0, "Cancel lock %s, flags 0x%x\n",
+ lockres->l_name, lockres->l_flags);
+
if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
spin_unlock(&lockres->l_lock);
goto drop_ref;
@@ -296,14 +334,7 @@ static void user_dlm_unblock_lock(void *
LKM_CANCEL,
user_unlock_ast,
lockres);
- if (status == DLM_CANCELGRANT) {
- /* If we got this, then the ast was fired
- * before we could cancel. We cleanup our
- * state, and restart the function. */
- spin_lock(&lockres->l_lock);
- lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
- spin_unlock(&lockres->l_lock);
- } else if (status != DLM_NORMAL)
+ if (status != DLM_NORMAL)
user_log_dlm_error("dlmunlock", status, lockres);
goto drop_ref;
}
@@ -581,6 +612,14 @@ int user_dlm_destroy_lock(struct user_lo
mlog(0, "asked to destroy %s\n", lockres->l_name);
spin_lock(&lockres->l_lock);
+ if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
+ mlog(0, "Lock is already torn down\n");
+ spin_unlock(&lockres->l_lock);
+ return 0;
+ }
+
+ lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
+
while (lockres->l_flags & USER_LOCK_BUSY) {
spin_unlock(&lockres->l_lock);
@@ -606,7 +645,6 @@ int user_dlm_destroy_lock(struct user_lo
lockres->l_flags &= ~USER_LOCK_ATTACHED;
lockres->l_flags |= USER_LOCK_BUSY;
- lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
spin_unlock(&lockres->l_lock);
mlog(0, "unlocking lockres %s\n", lockres->l_name);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 34e903a..581eb45 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -260,6 +260,17 @@ static int ocfs2_truncate_file(struct in
if (new_i_size == le64_to_cpu(fe->i_size))
goto bail;
+ /* This forces other nodes to sync and drop their pages. Do
+ * this even if we have a truncate without allocation change -
+ * ocfs2 cluster sizes can be much greater than page size, so
+ * we have to truncate them anyway. */
+ status = ocfs2_data_lock(inode, 1);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ ocfs2_data_unlock(inode, 1);
+
if (le32_to_cpu(fe->i_clusters) ==
ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
@@ -272,14 +283,6 @@ static int ocfs2_truncate_file(struct in
goto bail;
}
- /* This forces other nodes to sync and drop their pages */
- status = ocfs2_data_lock(inode, 1);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
- ocfs2_data_unlock(inode, 1);
-
/* alright, we're going to need to do a full blown alloc size
* change. Orphan the inode so that recovery can complete the
* truncate if necessary. This does the task of marking
next reply other threads:[~2006-04-10 23:03 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-04-10 23:03 Mark Fasheh [this message]
2006-04-10 23:03 ` [git patches] ocfs2 fixes Mark Fasheh
-- strict thread matches above, loose matches on Subject: below --
2006-05-12 0:21 [Ocfs2-devel] " Mark Fasheh
2006-05-17 22:21 Mark Fasheh
2006-06-19 18:46 Mark Fasheh
2006-06-21 18:43 Mark Fasheh
2006-06-21 19:00 ` Brian Long
2006-06-21 19:17 ` Sunil Mushran
2006-12-29 15:18 Mark Fasheh
2007-01-22 11:02 Mark Fasheh
2007-01-29 12:05 Mark Fasheh
2007-01-29 13:19 ` Mark Fasheh
2007-01-29 21:19 ` Mark Fasheh
2007-02-02 9:28 Mark Fasheh
2007-03-28 13:29 Mark Fasheh
2007-05-03 11:57 Mark Fasheh
2007-06-08 10:48 Mark Fasheh
2007-09-11 15:32 Mark Fasheh
2007-09-21 9:36 Mark Fasheh
2007-11-07 9:41 Mark Fasheh
2007-11-27 17:16 Mark Fasheh
2007-12-17 11:00 Mark Fasheh
2008-03-04 10:47 Mark Fasheh
2008-03-10 15:46 Mark Fasheh
2008-05-02 18:35 [Ocfs2-devel] [git patches] Ocfs2 fixes Mark Fasheh
2008-06-01 16:16 Mark Fasheh
2008-06-16 20:12 Mark Fasheh
2008-11-04 18:32 Mark Fasheh
2008-12-01 22:54 Mark Fasheh
2008-12-17 19:34 Mark Fasheh
2009-03-13 17:14 Mark Fasheh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060410230306.GC25194@ca-server1.us.oracle.com \
--to=mark.fasheh@oracle.com \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
--cc=ocfs2-devel@oss.oracle.com \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.