From: teigland@sourceware.org <teigland@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/group/gfs_controld lock_dlm.h plock.c ...
Date: 18 Aug 2006 16:33:10 -0000 [thread overview]
Message-ID: <20060818163310.9752.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: teigland at sourceware.org 2006-08-18 16:33:08
Modified files:
group/gfs_controld: lock_dlm.h plock.c recover.c
Log message:
when the low nodeid fails, the checkpoint needs to be unlinked,
otherwise creating the ckpt will fail down the road when another
node mounts
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/lock_dlm.h.diff?cvsroot=cluster&r1=1.14&r2=1.15
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/plock.c.diff?cvsroot=cluster&r1=1.17&r2=1.18
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/recover.c.diff?cvsroot=cluster&r1=1.13&r2=1.14
--- cluster/group/gfs_controld/lock_dlm.h 2006/08/14 17:22:53 1.14
+++ cluster/group/gfs_controld/lock_dlm.h 2006/08/18 16:33:08 1.15
@@ -276,5 +276,6 @@
int dump_plocks(char *name, int fd);
void process_saved_plocks(struct mountgroup *mg);
void purge_plocks(struct mountgroup *mg, int nodeid, int unmount);
+int unlink_checkpoint(struct mountgroup *mg);
#endif
--- cluster/group/gfs_controld/plock.c 2006/08/17 19:39:17 1.17
+++ cluster/group/gfs_controld/plock.c 2006/08/18 16:33:08 1.18
@@ -1034,7 +1034,7 @@
return 0;
}
-int unlink_checkpoint(struct mountgroup *mg, SaNameT *name)
+int _unlink_checkpoint(struct mountgroup *mg, SaNameT *name)
{
SaCkptCheckpointHandleT h;
SaCkptCheckpointDescriptorT s;
@@ -1097,6 +1097,16 @@
return ret;
}
+int unlink_checkpoint(struct mountgroup *mg)
+{
+ SaNameT name;
+ int len;
+
+ len = snprintf(name.value, SA_MAX_NAME_LENGTH, "gfsplock.%s", mg->name);
+ name.length = len;
+ return _unlink_checkpoint(mg, &name);
+}
+
/* Copy all plock state into a checkpoint so new node can retrieve it. The
node creating the ckpt for the mounter needs to be the same node that's
sending the mounter its journals message (i.e. the low nodeid). The new
@@ -1139,7 +1149,7 @@
/* unlink an old checkpoint before we create a new one */
if (mg->cp_handle) {
- if (unlink_checkpoint(mg, &name))
+ if (_unlink_checkpoint(mg, &name))
return;
}
@@ -1231,7 +1241,7 @@
/* this shouldn't happen in general */
log_group(mg, "store_plocks: clearing old ckpt");
saCkptCheckpointClose(h);
- unlink_checkpoint(mg, &name);
+ _unlink_checkpoint(mg, &name);
goto open_retry;
}
if (rv != SA_AIS_OK) {
@@ -1318,6 +1328,9 @@
goto out_it;
}
+ if (!desc.sectionSize)
+ continue;
+
iov.sectionId = desc.sectionId;
iov.dataBuffer = §ion_buf;
iov.dataSize = desc.sectionSize;
@@ -1362,7 +1375,7 @@
out:
if (mg->low_nodeid == our_nodeid) {
log_group(mg, "retrieve_plocks: unlink ckpt from old low node");
- unlink_checkpoint(mg, &name);
+ _unlink_checkpoint(mg, &name);
} else
saCkptCheckpointClose(h);
}
@@ -1372,8 +1385,7 @@
struct posix_lock *po, *po2;
struct lock_waiter *w, *w2;
struct resource *r, *r2;
- int len, purged = 0;
- SaNameT name;
+ int purged = 0;
list_for_each_entry_safe(r, r2, &mg->resources, list) {
list_for_each_entry_safe(po, po2, &r->locks, list) {
@@ -1408,12 +1420,8 @@
we need to unlink it so another node can create a new ckpt for
the next mounter after we leave */
- if (unmount && mg->cp_handle) {
- len = snprintf(name.value, SA_MAX_NAME_LENGTH,
- "gfsplock.%s", mg->name);
- name.length = len;
- unlink_checkpoint(mg, &name);
- }
+ if (unmount && mg->cp_handle)
+ unlink_checkpoint(mg);
}
int dump_plocks(char *name, int fd)
--- cluster/group/gfs_controld/recover.c 2006/08/15 21:38:00 1.13
+++ cluster/group/gfs_controld/recover.c 2006/08/18 16:33:08 1.14
@@ -917,7 +917,7 @@
int *nodeids, int *pos_out, int *neg_out)
{
struct mg_member *memb, *safe;
- int i, found, id, pos = 0, neg = 0, low = -1;
+ int i, found, id, pos = 0, neg = 0, low = -1, old_low_finished_nodeid;
/* move departed nodes from members list to members_gone */
@@ -990,6 +990,7 @@
if (low == -1 || memb->nodeid < low)
low = memb->nodeid;
}
+ old_low_finished_nodeid = mg->low_finished_nodeid;
mg->low_finished_nodeid = low;
*pos_out = pos;
@@ -997,6 +998,15 @@
log_group(mg, "total members %d low_finished_nodeid %d",
mg->memb_count, low);
+
+ /* the low nodeid failed and we're the new low nodeid, we need
+ to unlink the ckpt that the failed node had open so new ckpts
+ can be created down the road */
+ if ((old_low_finished_nodeid != low) && (our_nodeid == low)) {
+ log_group(mg, "unlink ckpt for failed low node %d",
+ old_low_finished_nodeid);
+ unlink_checkpoint(mg);
+ }
}
struct mountgroup *create_mg(char *name)
next reply other threads:[~2006-08-18 16:33 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-08-18 16:33 teigland [this message]
-- strict thread matches above, loose matches on Subject: below --
2006-08-21 17:46 [Cluster-devel] cluster/group/gfs_controld lock_dlm.h plock.c teigland
2006-08-08 21:19 teigland
2006-08-07 16:57 teigland
2006-08-04 21:56 teigland
2006-08-02 18:27 teigland
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060818163310.9752.qmail@sourceware.org \
--to=teigland@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).