* [Cluster-devel] cluster/gnbd client/Makefile client/gnbd_monit ...
@ 2006-10-13 22:32 bmarzins
0 siblings, 0 replies; only message in thread
From: bmarzins @ 2006-10-13 22:32 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: bmarzins at sourceware.org 2006-10-13 22:32:30
Modified files:
gnbd/client : Makefile gnbd_monitor.c gnbd_monitor.h
gnbd/server : Makefile gnbd_clusterd.c
Removed files:
gnbd/utils : group.c group.h
Log message:
Make gnbd work with cman correctly. This sort of roughly falls under the heading
of bz #210415
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/client/Makefile.diff?cvsroot=cluster&r1=1.9&r2=1.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/client/gnbd_monitor.c.diff?cvsroot=cluster&r1=1.13&r2=1.14
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/client/gnbd_monitor.h.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/server/Makefile.diff?cvsroot=cluster&r1=1.9&r2=1.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/server/gnbd_clusterd.c.diff?cvsroot=cluster&r1=1.5&r2=1.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/utils/group.c.diff?cvsroot=cluster&r1=1.2&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/utils/group.h.diff?cvsroot=cluster&r1=1.1&r2=NONE
--- cluster/gnbd/client/Makefile 2006/08/11 15:18:14 1.9
+++ cluster/gnbd/client/Makefile 2006/10/13 22:32:30 1.10
@@ -19,12 +19,11 @@
$(top_srcdir)/utils/gnbd_utils.c
MONITOR_SRC= gnbd_monitor.c monitor_req.c $(top_srcdir)/utils/trans.c \
- $(top_srcdir)/utils/gnbd_utils.c $(top_srcdir)/utils/group.c \
+ $(top_srcdir)/utils/gnbd_utils.c \
$(top_srcdir)/utils/member_cman.c
INCLUDE= -I$(top_srcdir)/include -I$(top_srcdir)/server -I$(top_srcdir)/utils \
- -I${top_srcdir}/config -I${gnbdkincdir} -I${incdir} -I${cmanincdir} \
- -I../../group/lib
+ -I${top_srcdir}/config -I${gnbdkincdir} -I${incdir} -I${cmanincdir}
ifneq (${KERNEL_SRC}, )
# Use the kernel tree if patched, otherwise, look where cluster headers
@@ -33,7 +32,7 @@
echo '-I${KERNEL_SRC}/include'; fi)
endif
-LDLIBS+= -L${libdir} -L${cmanincdir} -L../../group/lib -lcman -lgroup -ldl -lpthread
+LDLIBS+= -L${libdir} -L${cmanincdir} -lcman -ldl -lpthread
CFLAGS+= -O2 -DGNBD_RELEASE_NAME=\"${RELEASE}\"
--- cluster/gnbd/client/gnbd_monitor.c 2006/08/11 15:18:14 1.13
+++ cluster/gnbd/client/gnbd_monitor.c 2006/10/13 22:32:30 1.14
@@ -28,7 +28,6 @@
#include <netinet/in.h>
#include "gnbd.h"
-#include "group.h"
#include "member_cman.h"
#include "gnbd_endian.h"
#include "list.h"
@@ -52,8 +51,15 @@
};
typedef struct waiter_s waiter_t;
+struct down_node_s {
+ int nodeid;
+ list_t list;
+};
+typedef struct down_node_s down_node_t;
+
#define MAX_NODES 256
+list_decl(down_node_list);
list_decl(waiter_list);
connection_t *connections;
struct pollfd *polls;
@@ -63,8 +69,6 @@
cman_handle_t ch;
cman_node_t nodes[MAX_NODES];
int num_nodes;
-cman_node_t old_nodes[MAX_NODES];
-int old_num_nodes;
int cman_cb;
int cman_reason;
@@ -74,7 +78,6 @@
#define CLUSTER 0
#define CONNECT 1
-#define GROUP 2
list_t monitor_list;
@@ -159,15 +162,7 @@
connections[CLUSTER].dev = -1;
polls[CONNECT].fd = start_comm_device("gnbd_monitorcomm");
polls[CONNECT].events = POLLIN;
- polls[GROUP].fd = setup_groupd("gnbd_monitor");
- if (polls[GROUP].fd < 0)
- fail_startup("cannot get group fd\n");
- polls[GROUP].events = POLLIN;
- connections[GROUP].buf = NULL;
- connections[GROUP].action = 0;
- connections[GROUP].size = 0;
- connections[GROUP].dev = -1;
- for(i = 3; i < open_max(); i++){
+ for(i = 2; i < open_max(); i++){
polls[i].fd = -1;
polls[i].revents = 0;
}
@@ -186,10 +181,6 @@
/* FIXME -- again, don't do this */
exit(1);
}
- if (index == GROUP){
- log_err("lost connection to groupd\n");
- exit(1);
- }
polls[index].fd = -1;
polls[index].revents = 0;
free(connections[index].buf);
@@ -356,6 +347,7 @@
waiter_t *waiter;
block_sigchld();
+ dev->state = FAILED_STATE;
list_foreach_safe(list_item, &waiter_list, tmp) {
waiter = list_entry(list_item, waiter_t, list);
@@ -375,14 +367,75 @@
unblock_sigchld();
}
-static void statechange(void)
+static void fail_devices(char *node)
{
- int ret;
monitor_t *dev;
+ list_t *item;
+
+ list_foreach(item, &monitor_list) {
+ dev = list_entry(item, monitor_t, list);
+ if (strcmp(dev->server, node) == 0)
+ fail_device(dev);
+ }
+}
+
+static char *nodeid_to_name(int nodeid)
+{
+ int i;
+
+ for(i = 0; i < num_nodes; i++)
+ if (nodes[i].cn_nodeid == nodeid)
+ return nodes[i].cn_name;
+ log_err("cannot find node that matches nodeid %d\n", nodeid);
+ exit(1);
+}
+
+static void check_down_nodes(void)
+{
+ uint64_t fence_time;
+ int fenced;
+ down_node_t *node;
list_t *item, *next;
+
+ list_foreach_safe(item, &down_node_list, next){
+ node = list_entry(item, down_node_t, list);
+ if (cman_get_fenceinfo(ch, node->nodeid, &fence_time, &fenced, NULL) < 0) {
+ log_err("cannot get fence info for nodeid %d : %s\n", node->nodeid,
+ strerror(errno));
+ exit(1);
+ }
+ if (fenced){
+ fail_devices(nodeid_to_name(node->nodeid));
+ list_del(&node->list);
+ free(node);
+ }
+ }
+}
- old_num_nodes = num_nodes;
- memcpy(&old_nodes, &nodes, sizeof(old_nodes));
+static down_node_t *get_down_node(int nodeid)
+{
+ list_t *item;
+ down_node_t *node;
+
+ list_foreach(item, &down_node_list) {
+ node = list_entry(item, down_node_t, list);
+ if (node->nodeid == nodeid)
+ return node;
+ }
+ return NULL;
+}
+
+static void get_initial_nodelist(void)
+{
+ if (cman_get_nodes(ch, MAX_NODES, &num_nodes, nodes) < 0) {
+ log_err("can't get initial cluster node list : %s\n", strerror(errno));
+ exit(1);
+ }
+}
+
+static void statechange(void)
+{
+ int ret, i;
num_nodes = 0;
memset(&nodes, 0, sizeof(nodes));
@@ -391,13 +444,41 @@
log_err("can't get cluster node list : %s\n", strerror(errno));
exit(1);
}
- list_foreach_safe(item, &monitor_list, next){
- dev = list_entry(item, monitor_t, list);
- if (check_for_node(old_nodes, old_num_nodes, dev->server) &&
- !check_for_node(nodes, num_nodes, dev->server))
- fail_device(dev);
+ for (i = 0; i < num_nodes; i++){
+ if (nodes[i].cn_member) {
+ down_node_t *node = get_down_node(nodes[i].cn_nodeid);
+ if (!node)
+ continue;
+ fail_devices(nodes[i].cn_name);
+ list_del(&node->list);
+ free(node);
+ }
+ else {
+ monitor_t *dev;
+ list_t *item;
+ if (get_down_node(nodes[i].cn_nodeid))
+ continue;
+ list_foreach(item, &monitor_list) {
+ down_node_t *node;
+ dev = list_entry(item, monitor_t, list);
+ if (strcmp(dev->server, nodes[i].cn_name) != 0)
+ continue;
+ if (dev->state == RESET_STATE || dev->state == RESTARTABLE_STATE ||
+ dev->state == FAILED_STATE)
+ continue;
+ node = malloc(sizeof(down_node_t));
+ if (!node) {
+ log_err("cannot allocate memory for down node %s\n",
+ nodes[i].cn_name);
+ exit(1);
+ }
+ node->nodeid = nodes[i].cn_nodeid;
+ list_add(&node->list, &down_node_list);
+ break;
+ }
+ }
}
-}
+}
void handle_cluster_msg(void)
{
@@ -608,7 +689,7 @@
exit(1);
for(i = open_max()-1; i > 2; --i)
close(i);
- execlp("gnbd_recvd", "gnbd_recvd", "-f", "-d", minor_str);
+ execlp("gnbd_recvd", "gnbd_recvd", "-f", "-d", minor_str, NULL);
exit(1);
}
@@ -661,9 +742,9 @@
log_err("cman_admin_init failure : %s\n", strerror(errno));
goto cant_fence;
}
- if (cman_kill_node(ch, server->cn_nodeid) < 0){
+ if (cman_kill_node(ach, server->cn_nodeid) < 0){
log_err("fence of %s failed : %s\n", dev->server, strerror(errno));
- cman_finish(ch);
+ cman_finish(ach);
goto cant_fence;
}
cman_finish(ach);
@@ -685,6 +766,7 @@
start_recvd(dev);
break;
/* FENCED_STATE */
+ /* FAILED_STATE */
}
}
}
@@ -717,6 +799,9 @@
case FENCED_STATE:
strcpy(state, "fenced");
break;
+ case FAILED_STATE:
+ strcpy(state, "failed");
+ break;
}
printf("%8d %7d %s\n", ptr->minor_nr, ptr->timeout, state);
}
@@ -734,8 +819,11 @@
log_err("poll error : %s\n", strerror(errno));
return;
}
- if (err == 0)
+ if (err == 0) {
check_devices();
+ check_down_nodes();
+ return;
+ }
for (i = 0; i <= max_id; i++){
if (polls[i].revents & (POLLERR | POLLHUP | POLLNVAL)){
log_err("Bad poll result, 0x%x on id %d\n", polls[i].revents, i);
@@ -747,8 +835,6 @@
accept_connection();
else if (i == CLUSTER)
handle_cluster_msg();
- else if (i == GROUP)
- default_process_groupd();
else
handle_msg(i);
}
@@ -819,14 +905,13 @@
list_init(&monitor_list);
setup_poll();
-
err = monitor_device(minor_nr, timeout, argv[3]);
if (err)
fail_startup("cannot add device #%d to monitor_list : %s\n", minor_nr,
strerror(err));
finish_startup("gnbd_monitor started. Monitoring device #%d\n", minor_nr);
-
+ get_initial_nodelist();
while(1){
do_poll();
}
--- cluster/gnbd/client/gnbd_monitor.h 2004/08/14 01:33:20 1.3
+++ cluster/gnbd/client/gnbd_monitor.h 2006/10/13 22:32:30 1.4
@@ -23,6 +23,7 @@
#define RESET_STATE 2
#define RESTARTABLE_STATE 3
#define FENCED_STATE 4
+#define FAILED_STATE 5
struct monitor_info_s {
int minor_nr;
--- cluster/gnbd/server/Makefile 2006/08/11 15:18:14 1.9
+++ cluster/gnbd/server/Makefile 2006/10/13 22:32:30 1.10
@@ -17,16 +17,15 @@
include ${top_srcdir}/make/defines.mk
CLU_SOURCE= gnbd_clusterd.c $(top_srcdir)/utils/gnbd_utils.c \
- $(top_srcdir)/utils/member_cman.c $(top_srcdir)/utils/group.c
+ $(top_srcdir)/utils/member_cman.c
-LDLIBS+= -L${libdir} -L${cmanincdir} -L../../group/lib -lcman -lgroup -ldl -lpthread
+LDLIBS+= -L${libdir} -L${cmanincdir} -lcman -ldl -lpthread
SRV_SOURCE= gnbd_serv.c local_req.c extern_req.c device.c gserv.c fence.c \
$(top_srcdir)/utils/trans.c $(top_srcdir)/utils/gnbd_utils.c
-INCLUDE= -I$(top_srcdir)/include -I$(top_srcdir)/utils -I${groupincdir}\
- -I${top_srcdir}/config -I${gnbdkincdir} -I${incdir} -I${cmanincdir} \
- -I../../group/lib
+INCLUDE= -I$(top_srcdir)/include -I$(top_srcdir)/utils \
+ -I${top_srcdir}/config -I${gnbdkincdir} -I${incdir} -I${cmanincdir}
ifneq (${KERNEL_SRC}, )
# Use the kernel tree if patched, otherwise, look where cluster headers
--- cluster/gnbd/server/gnbd_clusterd.c 2006/05/16 19:08:17 1.5
+++ cluster/gnbd/server/gnbd_clusterd.c 2006/10/13 22:32:30 1.6
@@ -20,15 +20,12 @@
#include "gnbd_utils.h"
#include "member_cman.h"
-#include "group.h"
#define CMAN 0
-#define GROUP 1
-struct pollfd polls[2];
+struct pollfd polls[1];
static int quit = 0;
-group_callbacks_t callbacks;
static void sig_usr1(int sig)
{}
@@ -56,28 +53,16 @@
{
polls[CMAN].fd = setup_member(NULL);
if (polls[CMAN].fd < 0)
- finish_startup("cannot join cman\n");
- polls[GROUP].fd = setup_groupd("gnbd_clusterd");
- if (polls[GROUP].fd < 0) {
- exit_member();
- fail_startup("cannot init group\n");
- }
- if (group_join(gh, "default")) {
- exit_groupd();
- exit_member();
- fail_startup("cannot join group\n");
- }
+ fail_startup("cannot join cman\n");
polls[CMAN].events = POLLIN;
polls[CMAN].revents = 0;
- polls[GROUP].events = POLLIN;
- polls[GROUP].revents = 0;
}
void do_poll(void)
{
int err;
- err = poll(polls, 2, -1);
+ err = poll(polls, 1, -1);
if (err < 0) {
if (errno != EINTR)
log_err("poll error : %s\n", strerror(errno));
@@ -87,15 +72,9 @@
log_err("Bad poll result 0x%x from cluster\n", polls[CMAN].revents);
exit(1);
}
- if (polls[GROUP].revents & (POLLERR | POLLHUP | POLLNVAL)) {
- log_err("Bad poll result 0x%x from groupd\n", polls[GROUP].revents);
- exit(1);
- }
if (polls[CMAN].revents & POLLIN)
default_process_member();
- if (polls[GROUP].revents & POLLIN)
- default_process_groupd();
}
int main(int argc, char **argv){
@@ -137,8 +116,6 @@
while(!quit){
do_poll();
}
- group_leave(gh, "default");
- group_exit(gh);
cman_finish(ch);
return 0;
}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2006-10-13 22:32 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-13 22:32 [Cluster-devel] cluster/gnbd client/Makefile client/gnbd_monit bmarzins
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).