From: bmarzins@sourceware.org <bmarzins@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/gnbd client/Makefile client/gnbd_monit ...
Date: 13 Oct 2006 22:32:33 -0000 [thread overview]
Message-ID: <20061013223233.6038.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: bmarzins at sourceware.org 2006-10-13 22:32:30
Modified files:
gnbd/client : Makefile gnbd_monitor.c gnbd_monitor.h
gnbd/server : Makefile gnbd_clusterd.c
Removed files:
gnbd/utils : group.c group.h
Log message:
Make gnbd work with cman correctly. This sort of roughly falls under the heading
of bz #210415
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/client/Makefile.diff?cvsroot=cluster&r1=1.9&r2=1.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/client/gnbd_monitor.c.diff?cvsroot=cluster&r1=1.13&r2=1.14
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/client/gnbd_monitor.h.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/server/Makefile.diff?cvsroot=cluster&r1=1.9&r2=1.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/server/gnbd_clusterd.c.diff?cvsroot=cluster&r1=1.5&r2=1.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/utils/group.c.diff?cvsroot=cluster&r1=1.2&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/utils/group.h.diff?cvsroot=cluster&r1=1.1&r2=NONE
--- cluster/gnbd/client/Makefile 2006/08/11 15:18:14 1.9
+++ cluster/gnbd/client/Makefile 2006/10/13 22:32:30 1.10
@@ -19,12 +19,11 @@
$(top_srcdir)/utils/gnbd_utils.c
MONITOR_SRC= gnbd_monitor.c monitor_req.c $(top_srcdir)/utils/trans.c \
- $(top_srcdir)/utils/gnbd_utils.c $(top_srcdir)/utils/group.c \
+ $(top_srcdir)/utils/gnbd_utils.c \
$(top_srcdir)/utils/member_cman.c
INCLUDE= -I$(top_srcdir)/include -I$(top_srcdir)/server -I$(top_srcdir)/utils \
- -I${top_srcdir}/config -I${gnbdkincdir} -I${incdir} -I${cmanincdir} \
- -I../../group/lib
+ -I${top_srcdir}/config -I${gnbdkincdir} -I${incdir} -I${cmanincdir}
ifneq (${KERNEL_SRC}, )
# Use the kernel tree if patched, otherwise, look where cluster headers
@@ -33,7 +32,7 @@
echo '-I${KERNEL_SRC}/include'; fi)
endif
-LDLIBS+= -L${libdir} -L${cmanincdir} -L../../group/lib -lcman -lgroup -ldl -lpthread
+LDLIBS+= -L${libdir} -L${cmanincdir} -lcman -ldl -lpthread
CFLAGS+= -O2 -DGNBD_RELEASE_NAME=\"${RELEASE}\"
--- cluster/gnbd/client/gnbd_monitor.c 2006/08/11 15:18:14 1.13
+++ cluster/gnbd/client/gnbd_monitor.c 2006/10/13 22:32:30 1.14
@@ -28,7 +28,6 @@
#include <netinet/in.h>
#include "gnbd.h"
-#include "group.h"
#include "member_cman.h"
#include "gnbd_endian.h"
#include "list.h"
@@ -52,8 +51,15 @@
};
typedef struct waiter_s waiter_t;
+struct down_node_s {
+ int nodeid;
+ list_t list;
+};
+typedef struct down_node_s down_node_t;
+
#define MAX_NODES 256
+list_decl(down_node_list);
list_decl(waiter_list);
connection_t *connections;
struct pollfd *polls;
@@ -63,8 +69,6 @@
cman_handle_t ch;
cman_node_t nodes[MAX_NODES];
int num_nodes;
-cman_node_t old_nodes[MAX_NODES];
-int old_num_nodes;
int cman_cb;
int cman_reason;
@@ -74,7 +78,6 @@
#define CLUSTER 0
#define CONNECT 1
-#define GROUP 2
list_t monitor_list;
@@ -159,15 +162,7 @@
connections[CLUSTER].dev = -1;
polls[CONNECT].fd = start_comm_device("gnbd_monitorcomm");
polls[CONNECT].events = POLLIN;
- polls[GROUP].fd = setup_groupd("gnbd_monitor");
- if (polls[GROUP].fd < 0)
- fail_startup("cannot get group fd\n");
- polls[GROUP].events = POLLIN;
- connections[GROUP].buf = NULL;
- connections[GROUP].action = 0;
- connections[GROUP].size = 0;
- connections[GROUP].dev = -1;
- for(i = 3; i < open_max(); i++){
+ for(i = 2; i < open_max(); i++){
polls[i].fd = -1;
polls[i].revents = 0;
}
@@ -186,10 +181,6 @@
/* FIXME -- again, don't do this */
exit(1);
}
- if (index == GROUP){
- log_err("lost connection to groupd\n");
- exit(1);
- }
polls[index].fd = -1;
polls[index].revents = 0;
free(connections[index].buf);
@@ -356,6 +347,7 @@
waiter_t *waiter;
block_sigchld();
+ dev->state = FAILED_STATE;
list_foreach_safe(list_item, &waiter_list, tmp) {
waiter = list_entry(list_item, waiter_t, list);
@@ -375,14 +367,75 @@
unblock_sigchld();
}
-static void statechange(void)
+static void fail_devices(char *node)
{
- int ret;
monitor_t *dev;
+ list_t *item;
+
+ list_foreach(item, &monitor_list) {
+ dev = list_entry(item, monitor_t, list);
+ if (strcmp(dev->server, node) == 0)
+ fail_device(dev);
+ }
+}
+
+static char *nodeid_to_name(int nodeid)
+{
+ int i;
+
+ for(i = 0; i < num_nodes; i++)
+ if (nodes[i].cn_nodeid == nodeid)
+ return nodes[i].cn_name;
+ log_err("cannot find node that matches nodeid %d\n", nodeid);
+ exit(1);
+}
+
+static void check_down_nodes(void)
+{
+ uint64_t fence_time;
+ int fenced;
+ down_node_t *node;
list_t *item, *next;
+
+ list_foreach_safe(item, &down_node_list, next){
+ node = list_entry(item, down_node_t, list);
+ if (cman_get_fenceinfo(ch, node->nodeid, &fence_time, &fenced, NULL) < 0) {
+ log_err("cannot get fence info for nodeid %d : %s\n", node->nodeid,
+ strerror(errno));
+ exit(1);
+ }
+ if (fenced){
+ fail_devices(nodeid_to_name(node->nodeid));
+ list_del(&node->list);
+ free(node);
+ }
+ }
+}
- old_num_nodes = num_nodes;
- memcpy(&old_nodes, &nodes, sizeof(old_nodes));
+static down_node_t *get_down_node(int nodeid)
+{
+ list_t *item;
+ down_node_t *node;
+
+ list_foreach(item, &down_node_list) {
+ node = list_entry(item, down_node_t, list);
+ if (node->nodeid == nodeid)
+ return node;
+ }
+ return NULL;
+}
+
+static void get_initial_nodelist(void)
+{
+ if (cman_get_nodes(ch, MAX_NODES, &num_nodes, nodes) < 0) {
+ log_err("can't get initial cluster node list : %s\n", strerror(errno));
+ exit(1);
+ }
+}
+
+static void statechange(void)
+{
+ int ret, i;
num_nodes = 0;
memset(&nodes, 0, sizeof(nodes));
@@ -391,13 +444,41 @@
log_err("can't get cluster node list : %s\n", strerror(errno));
exit(1);
}
- list_foreach_safe(item, &monitor_list, next){
- dev = list_entry(item, monitor_t, list);
- if (check_for_node(old_nodes, old_num_nodes, dev->server) &&
- !check_for_node(nodes, num_nodes, dev->server))
- fail_device(dev);
+ for (i = 0; i < num_nodes; i++){
+ if (nodes[i].cn_member) {
+ down_node_t *node = get_down_node(nodes[i].cn_nodeid);
+ if (!node)
+ continue;
+ fail_devices(nodes[i].cn_name);
+ list_del(&node->list);
+ free(node);
+ }
+ else {
+ monitor_t *dev;
+ list_t *item;
+ if (get_down_node(nodes[i].cn_nodeid))
+ continue;
+ list_foreach(item, &monitor_list) {
+ down_node_t *node;
+ dev = list_entry(item, monitor_t, list);
+ if (strcmp(dev->server, nodes[i].cn_name) != 0)
+ continue;
+ if (dev->state == RESET_STATE || dev->state == RESTARTABLE_STATE ||
+ dev->state == FAILED_STATE)
+ continue;
+ node = malloc(sizeof(down_node_t));
+ if (!node) {
+ log_err("cannot allocate memory for down node %s\n",
+ nodes[i].cn_name);
+ exit(1);
+ }
+ node->nodeid = nodes[i].cn_nodeid;
+ list_add(&node->list, &down_node_list);
+ break;
+ }
+ }
}
-}
+}
void handle_cluster_msg(void)
{
@@ -608,7 +689,7 @@
exit(1);
for(i = open_max()-1; i > 2; --i)
close(i);
- execlp("gnbd_recvd", "gnbd_recvd", "-f", "-d", minor_str);
+ execlp("gnbd_recvd", "gnbd_recvd", "-f", "-d", minor_str, NULL);
exit(1);
}
@@ -661,9 +742,9 @@
log_err("cman_admin_init failure : %s\n", strerror(errno));
goto cant_fence;
}
- if (cman_kill_node(ch, server->cn_nodeid) < 0){
+ if (cman_kill_node(ach, server->cn_nodeid) < 0){
log_err("fence of %s failed : %s\n", dev->server, strerror(errno));
- cman_finish(ch);
+ cman_finish(ach);
goto cant_fence;
}
cman_finish(ach);
@@ -685,6 +766,7 @@
start_recvd(dev);
break;
/* FENCED_STATE */
+ /* FAILED_STATE */
}
}
}
@@ -717,6 +799,9 @@
case FENCED_STATE:
strcpy(state, "fenced");
break;
+ case FAILED_STATE:
+ strcpy(state, "failed");
+ break;
}
printf("%8d %7d %s\n", ptr->minor_nr, ptr->timeout, state);
}
@@ -734,8 +819,11 @@
log_err("poll error : %s\n", strerror(errno));
return;
}
- if (err == 0)
+ if (err == 0) {
check_devices();
+ check_down_nodes();
+ return;
+ }
for (i = 0; i <= max_id; i++){
if (polls[i].revents & (POLLERR | POLLHUP | POLLNVAL)){
log_err("Bad poll result, 0x%x on id %d\n", polls[i].revents, i);
@@ -747,8 +835,6 @@
accept_connection();
else if (i == CLUSTER)
handle_cluster_msg();
- else if (i == GROUP)
- default_process_groupd();
else
handle_msg(i);
}
@@ -819,14 +905,13 @@
list_init(&monitor_list);
setup_poll();
-
err = monitor_device(minor_nr, timeout, argv[3]);
if (err)
fail_startup("cannot add device #%d to monitor_list : %s\n", minor_nr,
strerror(err));
finish_startup("gnbd_monitor started. Monitoring device #%d\n", minor_nr);
-
+ get_initial_nodelist();
while(1){
do_poll();
}
--- cluster/gnbd/client/gnbd_monitor.h 2004/08/14 01:33:20 1.3
+++ cluster/gnbd/client/gnbd_monitor.h 2006/10/13 22:32:30 1.4
@@ -23,6 +23,7 @@
#define RESET_STATE 2
#define RESTARTABLE_STATE 3
#define FENCED_STATE 4
+#define FAILED_STATE 5
struct monitor_info_s {
int minor_nr;
--- cluster/gnbd/server/Makefile 2006/08/11 15:18:14 1.9
+++ cluster/gnbd/server/Makefile 2006/10/13 22:32:30 1.10
@@ -17,16 +17,15 @@
include ${top_srcdir}/make/defines.mk
CLU_SOURCE= gnbd_clusterd.c $(top_srcdir)/utils/gnbd_utils.c \
- $(top_srcdir)/utils/member_cman.c $(top_srcdir)/utils/group.c
+ $(top_srcdir)/utils/member_cman.c
-LDLIBS+= -L${libdir} -L${cmanincdir} -L../../group/lib -lcman -lgroup -ldl -lpthread
+LDLIBS+= -L${libdir} -L${cmanincdir} -lcman -ldl -lpthread
SRV_SOURCE= gnbd_serv.c local_req.c extern_req.c device.c gserv.c fence.c \
$(top_srcdir)/utils/trans.c $(top_srcdir)/utils/gnbd_utils.c
-INCLUDE= -I$(top_srcdir)/include -I$(top_srcdir)/utils -I${groupincdir}\
- -I${top_srcdir}/config -I${gnbdkincdir} -I${incdir} -I${cmanincdir} \
- -I../../group/lib
+INCLUDE= -I$(top_srcdir)/include -I$(top_srcdir)/utils \
+ -I${top_srcdir}/config -I${gnbdkincdir} -I${incdir} -I${cmanincdir}
ifneq (${KERNEL_SRC}, )
# Use the kernel tree if patched, otherwise, look where cluster headers
--- cluster/gnbd/server/gnbd_clusterd.c 2006/05/16 19:08:17 1.5
+++ cluster/gnbd/server/gnbd_clusterd.c 2006/10/13 22:32:30 1.6
@@ -20,15 +20,12 @@
#include "gnbd_utils.h"
#include "member_cman.h"
-#include "group.h"
#define CMAN 0
-#define GROUP 1
-struct pollfd polls[2];
+struct pollfd polls[1];
static int quit = 0;
-group_callbacks_t callbacks;
static void sig_usr1(int sig)
{}
@@ -56,28 +53,16 @@
{
polls[CMAN].fd = setup_member(NULL);
if (polls[CMAN].fd < 0)
- finish_startup("cannot join cman\n");
- polls[GROUP].fd = setup_groupd("gnbd_clusterd");
- if (polls[GROUP].fd < 0) {
- exit_member();
- fail_startup("cannot init group\n");
- }
- if (group_join(gh, "default")) {
- exit_groupd();
- exit_member();
- fail_startup("cannot join group\n");
- }
+ fail_startup("cannot join cman\n");
polls[CMAN].events = POLLIN;
polls[CMAN].revents = 0;
- polls[GROUP].events = POLLIN;
- polls[GROUP].revents = 0;
}
void do_poll(void)
{
int err;
- err = poll(polls, 2, -1);
+ err = poll(polls, 1, -1);
if (err < 0) {
if (errno != EINTR)
log_err("poll error : %s\n", strerror(errno));
@@ -87,15 +72,9 @@
log_err("Bad poll result 0x%x from cluster\n", polls[CMAN].revents);
exit(1);
}
- if (polls[GROUP].revents & (POLLERR | POLLHUP | POLLNVAL)) {
- log_err("Bad poll result 0x%x from groupd\n", polls[GROUP].revents);
- exit(1);
- }
if (polls[CMAN].revents & POLLIN)
default_process_member();
- if (polls[GROUP].revents & POLLIN)
- default_process_groupd();
}
int main(int argc, char **argv){
@@ -137,8 +116,6 @@
while(!quit){
do_poll();
}
- group_leave(gh, "default");
- group_exit(gh);
cman_finish(ch);
return 0;
}
reply other threads:[~2006-10-13 22:32 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061013223233.6038.qmail@sourceware.org \
--to=bmarzins@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.