From: teigland@sourceware.org <teigland@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/group/gfs_controld Makefile cpg.c grou ...
Date: 15 Jun 2006 20:41:47 -0000 [thread overview]
Message-ID: <20060615204147.14251.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: teigland at sourceware.org 2006-06-15 20:41:46
Modified files:
group/gfs_controld: Makefile cpg.c group.c lock_dlm.h main.c
member_cman.c recover.c
Log message:
Complete the code to support withdraw, not yet tested. This also
switches from using dlm locks for withdraw notifications to simply
using messages. The way the daemon now works allows a much simpler
approach to withdraw than what we had before where we needed the
dlm locks. Setting up a dlm lockspace for the daemon was also an
annoyingly heavy-weight step and the dlm kernel state of the daemon
made cleaning up from crashes difficult.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/Makefile.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/cpg.c.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/group.c.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/lock_dlm.h.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/main.c.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/member_cman.c.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/recover.c.diff?cvsroot=cluster&r1=1.2&r2=1.3
--- cluster/group/gfs_controld/Makefile 2006/06/09 20:59:57 1.1
+++ cluster/group/gfs_controld/Makefile 2006/06/15 20:41:46 1.2
@@ -22,8 +22,7 @@
-I../include/ \
-I../lib/ \
-I../../cman/lib/ \
- -I../../cman/daemon/openais/trunk/include/ \
- -I../../dlm/lib/
+ -I../../cman/daemon/openais/trunk/include/
TARGET=gfs_controld
@@ -38,8 +37,6 @@
group.o \
plock.o \
recover.o \
- withdraw.o \
- ../../dlm/lib/libdlm_lt.a \
../../cman/lib/libcman.a \
../../cman/daemon/openais/trunk/lib/libcpg.a \
../lib/libgroup.a
--- cluster/group/gfs_controld/cpg.c 2006/06/15 15:27:43 1.2
+++ cluster/group/gfs_controld/cpg.c 2006/06/15 20:41:46 1.3
@@ -24,6 +24,7 @@
void receive_options(struct mountgroup *mg, char *buf, int len, int from);
void receive_remount(struct mountgroup *mg, char *buf, int len, int from);
void receive_plock(struct mountgroup *mg, char *buf, int len, int from);
+void receive_withdraw(struct mountgroup *mg, char *buf, int len, int from);
void receive_recovery_status(struct mountgroup *mg, char *buf, int len,
int from);
void receive_recovery_done(struct mountgroup *mg, char *buf, int len, int from);
@@ -88,6 +89,10 @@
receive_recovery_done(mg, data, len, nodeid);
break;
+ case MSG_WITHDRAW:
+ receive_withdraw(mg, data, len, nodeid);
+ break;
+
default:
log_error("unknown message type %d from %d",
hd->type, hd->nodeid);
--- cluster/group/gfs_controld/group.c 2006/06/09 20:59:57 1.1
+++ cluster/group/gfs_controld/group.c 2006/06/15 20:41:46 1.2
@@ -147,8 +147,6 @@
log_debug("groupd callback: terminate %s", cb_name);
mg->last_callback = DO_TERMINATE;
do_terminate(mg);
- list_del(&mg->list);
- free(mg);
break;
case DO_SETID:
--- cluster/group/gfs_controld/lock_dlm.h 2006/06/15 15:27:43 1.2
+++ cluster/group/gfs_controld/lock_dlm.h 2006/06/15 20:41:46 1.3
@@ -36,7 +36,6 @@
#include "list.h"
#include "linux_endian.h"
#include "libgroup.h"
-#include "libdlm.h"
#define MAXARGS 64
#define MAXLINE 256
@@ -184,11 +183,10 @@
int wait_gfs_recover_done;
int gone_event;
int gone_type;
- int mount_finished;
+ int finished;
int local_recovery_status;
int recovery_status;
- int withdraw;
- struct dlm_lksb wd_lksb;
+ int withdrawing;
int needs_journals;
};
@@ -197,6 +195,7 @@
MSG_OPTIONS,
MSG_REMOUNT,
MSG_PLOCK,
+ MSG_WITHDRAW,
MSG_RECOVERY_STATUS,
MSG_RECOVERY_DONE,
};
@@ -223,12 +222,9 @@
int process_cpg(void);
int setup_groupd(void);
int process_groupd(void);
-int setup_libdlm(void);
-int process_libdlm(void);
int setup_plocks(void);
int process_plocks(void);
void exit_cman(void);
-void exit_libdlm(void);
int do_mount(int ci, char *dir, char *type, char *proto, char *table,
char *options);
--- cluster/group/gfs_controld/main.c 2006/06/15 15:27:43 1.2
+++ cluster/group/gfs_controld/main.c 2006/06/15 20:41:46 1.3
@@ -29,10 +29,10 @@
static int listen_fd;
static int groupd_fd;
static int uevent_fd;
-static int libdlm_fd;
static int plocks_fd;
extern struct list_head mounts;
+extern struct list_head withdrawn_mounts;
int no_withdraw;
static void make_args(char *buf, int *argc, char **argv, char sep)
@@ -266,14 +266,6 @@
goto out;
client_add(uevent_fd, &maxi);
- if (no_withdraw)
- goto next;
-
- rv = libdlm_fd = setup_libdlm();
- if (rv < 0)
- goto next;
- client_add(libdlm_fd, &maxi);
- next:
rv = plocks_fd = setup_plocks();
if (rv < 0)
goto out;
@@ -309,9 +301,6 @@
process_cpg();
else if (pollfd[i].fd == uevent_fd)
process_uevent();
- else if (!no_withdraw &&
- pollfd[i].fd == libdlm_fd)
- process_libdlm();
else if (pollfd[i].fd == plocks_fd)
process_plocks();
else
@@ -456,6 +445,7 @@
{
prog_name = argv[0];
INIT_LIST_HEAD(&mounts);
+ INIT_LIST_HEAD(&withdrawn_mounts);
client_init();
decode_arguments(argc, argv);
--- cluster/group/gfs_controld/member_cman.c 2006/06/09 20:59:57 1.1
+++ cluster/group/gfs_controld/member_cman.c 2006/06/15 20:41:46 1.2
@@ -43,10 +43,7 @@
void exit_cman(void)
{
- /* do we want to try to forcibly clean some stuff up
- in the kernel here? */
log_error("cluster is down, exiting");
- exit_libdlm();
exit(1);
}
--- cluster/group/gfs_controld/recover.c 2006/06/15 15:27:43 1.2
+++ cluster/group/gfs_controld/recover.c 2006/06/15 20:41:46 1.3
@@ -26,14 +26,12 @@
extern char *clustername;
extern int our_nodeid;
extern group_handle_t gh;
+extern int no_withdraw;
struct list_head mounts;
+struct list_head withdrawn_mounts;
void send_journals(struct mountgroup *mg, int nodeid);
-int hold_withdraw_locks(struct mountgroup *mg);
-void release_withdraw_lock(struct mountgroup *mg, struct mg_member *memb);
-void release_withdraw_locks(struct mountgroup *mg);
-
void start_participant_init_2(struct mountgroup *mg);
void start_spectator_init_2(struct mountgroup *mg);
void start_spectator_2(struct mountgroup *mg);
@@ -146,6 +144,46 @@
mg->remount_client = 0;
}
+void send_withdraw(struct mountgroup *mg)
+{
+ struct gdlm_header *hd;
+ int len;
+ char *buf;
+
+ len = sizeof(struct gdlm_header);
+
+ buf = malloc(len);
+ if (!buf)
+ return;
+ memset(buf, 0, len);
+
+ hd = (struct gdlm_header *)buf;
+ hd->type = MSG_WITHDRAW;
+ hd->nodeid = our_nodeid;
+ hd->to_nodeid = 0;
+
+ log_group(mg, "send_withdraw");
+
+ send_group_message(mg, len, buf);
+
+ free(buf);
+}
+
+void receive_withdraw(struct mountgroup *mg, char *buf, int len, int from)
+{
+ struct mg_member *memb;
+
+ memb = find_memb_nodeid(mg, from);
+ if (!memb) {
+ log_group(mg, "receive_withdraw no member %d", from);
+ return;
+ }
+ memb->withdrawing = 1;
+
+ if (from == our_nodeid)
+ group_leave(gh, mg->name);
+}
+
#define SEND_RS_INTS 3
void send_recovery_status(struct mountgroup *mg)
@@ -267,6 +305,8 @@
return "MSG_RECOVERY_STATUS";
case MSG_RECOVERY_DONE:
return "MSG_RECOVERY_DONE";
+ case MSG_WITHDRAW:
+ return "MSG_WITHDRAW";
}
return "unknown";
}
@@ -911,7 +951,7 @@
- no journal cb if we've already done a journl cb */
if ((memb->gone_type == GROUP_NODE_FAILED ||
- memb->withdraw) &&
+ memb->withdrawing) &&
memb->jid != JID_INIT &&
!memb->spectator &&
!memb->wait_gfs_recover_done) {
@@ -925,7 +965,7 @@
memb->nodeid, memb->tell_gfs_to_recover,
mg->spectator,
mg->start_type,
- memb->withdraw,
+ memb->withdrawing,
memb->jid,
memb->spectator,
memb->wait_gfs_recover_done);
@@ -944,7 +984,7 @@
}
list_for_each_entry(memb, &mg->members, list) {
- if (!memb->mount_finished)
+ if (!memb->finished)
continue;
if (low == -1 || memb->nodeid < low)
low = memb->nodeid;
@@ -1186,7 +1226,12 @@
struct mg_member *memb;
int rv;
- if (mg->spectator || mg->readonly || mg->our_jid == JID_INIT) {
+ /* we can't do journal recovery if: we're a spectator or readonly
+ mount, gfs is currently withdrawing, or we're mounting and haven't
+ received a journals message yet */
+
+ if (mg->spectator || mg->readonly || mg->withdraw ||
+ mg->our_jid == JID_INIT) {
list_for_each_entry(memb, &mg->members_gone, list) {
if (!memb->tell_gfs_to_recover)
continue;
@@ -1406,11 +1451,25 @@
{
struct mountgroup *mg;
+ list_for_each_entry(mg, &withdrawn_mounts, list) {
+ if (!strcmp(mg->dir, dir)) {
+ log_group(mg, "unmount withdrawn fs");
+ list_del(&mg->list);
+ free(mg);
+ return 0;
+ }
+ }
+
mg = find_mg_dir(dir);
if (!mg) {
log_error("do_unmount: unknown mount dir %s", dir);
return -1;
}
+
+ if (mg->withdraw) {
+ log_error("do_unmount: fs on %s is withdrawing", dir);
+ return -1;
+ }
/* Check to see if we're waiting for a kernel recovery_done to do a
start_done(). If so, call the start_done() here because we won't be
@@ -1567,9 +1626,6 @@
from members_gone if their journals have been recovered */
list_for_each_entry_safe(memb, safe, &mg->members_gone, list) {
- if (!memb->withdraw)
- release_withdraw_lock(mg, memb);
-
if (!memb->recovery_status) {
list_del(&memb->list);
free(memb);
@@ -1588,18 +1644,8 @@
}
}
- list_for_each_entry(memb, &mg->members, list) {
- memb->mount_finished = 1;
-
- /* If there are still withdrawing nodes that haven't left
- the group, we need to keep lock requests blocked */
-
- if (memb->withdraw) {
- log_group(mg, "finish: leave locks blocked for "
- "withdrawing node %d", memb->nodeid);
- leave_blocked = 1;
- }
- }
+ list_for_each_entry(memb, &mg->members, list)
+ memb->finished = 1;
if (mg->needs_recovery) {
log_group(mg, "finish: leave locks blocked for needs_recovery");
@@ -1674,7 +1720,6 @@
mg->first_mounter_done = 0;
mg->got_our_options = 1;
mg->got_our_journals = 1;
- hold_withdraw_locks(mg);
}
start_done(mg);
notify_mount_client(mg);
@@ -1688,7 +1733,6 @@
log_group(mg, "start_participant_init");
set_our_memb_options(mg);
send_options(mg);
- hold_withdraw_locks(mg);
start_done(mg);
mg->start2_fn = start_participant_init_2;
}
@@ -1732,8 +1776,6 @@
log_group(mg, "start_participant pos=%d neg=%d", pos, neg);
if (pos) {
- hold_withdraw_locks(mg);
-
/* If we're the first mounter, and we're adding a second
node here, but haven't gotten first_done (others_may_mount)
from gfs yet, then don't do the start_done() to complete
@@ -1765,7 +1807,6 @@
log_group(mg, "start_spectator_init");
set_our_memb_options(mg);
send_options(mg);
- hold_withdraw_locks(mg);
start_done(mg);
mg->start2_fn = start_spectator_init_2;
}
@@ -1795,7 +1836,6 @@
log_group(mg, "start_spectator pos=%d neg=%d", pos, neg);
if (pos) {
- hold_withdraw_locks(mg);
start_done(mg);
process_saved_options(mg);
} else if (neg) {
@@ -1937,12 +1977,57 @@
that needs journal recovery, we have a problem because we wait to
call group_start_done() until gfs in the kernel to signal that
the journal recovery is done. If we've unmounted gfs isn't there
- any more to give us this signal and we'll never call start_done. */
+ any more to give us this signal and we'll never call start_done.
+
+ update: we should be dealing with all these issues correctly now. */
int do_terminate(struct mountgroup *mg)
{
- log_group(mg, "termination of our unmount leave");
- release_withdraw_locks(mg);
+ /* FIXME: all group members aren't guaranteed to be stopped for
+ our leave yet when we get terminate. We need that guarantee
+ before we tell a withdrawing gfs to drop locks. */
+
+ if (mg->withdraw) {
+ log_group(mg, "termination of our withdraw leave");
+ set_sysfs(mg, "withdraw", 1);
+ list_move(&mg->list, &withdrawn_mounts);
+ } else {
+ log_group(mg, "termination of our unmount leave");
+ list_del(&mg->list);
+ free(mg);
+ }
+
+ return 0;
+}
+
+/* The basic rule of withdraw is that we don't want to tell the kernel to drop
+ all locks until we know gfs has been stopped/blocked on all nodes. They'll
+ be stopped for our leave, we just need to know when they've all arrived
+ there.
+
+ A withdrawing node is very much like a readonly node, differences are
+ that others recover its journal when they remove it from the group,
+ and when it's been removed from the group (gets terminate for its leave),
+ it tells the locally withdrawing gfs to clear out locks. */
+
+int do_withdraw(char *table)
+{
+ struct mountgroup *mg;
+ char *name = strstr(table, ":") + 1;
+
+ if (no_withdraw) {
+ log_error("withdraw feature not enabled");
+ return 0;
+ }
+
+ mg = find_mg(name);
+ if (!mg) {
+ log_error("do_withdraw no mountgroup %s", name);
+ return -1;
+ }
+
+ mg->withdraw = 1;
+ send_withdraw(mg);
return 0;
}
reply other threads:[~2006-06-15 20:41 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060615204147.14251.qmail@sourceware.org \
--to=teigland@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.