All of lore.kernel.org
 help / color / mirror / Atom feed
From: bmarzins@sourceware.org <bmarzins@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/gnbd client/Makefile client/gnbd_monit ...
Date: 13 Oct 2006 22:32:33 -0000	[thread overview]
Message-ID: <20061013223233.6038.qmail@sourceware.org> (raw)

CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	bmarzins at sourceware.org	2006-10-13 22:32:30

Modified files:
	gnbd/client    : Makefile gnbd_monitor.c gnbd_monitor.h 
	gnbd/server    : Makefile gnbd_clusterd.c 
Removed files:
	gnbd/utils     : group.c group.h 

Log message:
	Make gnbd work with cman correctly. This sort of roughly falls under the heading
	of bz #210415

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/client/Makefile.diff?cvsroot=cluster&r1=1.9&r2=1.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/client/gnbd_monitor.c.diff?cvsroot=cluster&r1=1.13&r2=1.14
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/client/gnbd_monitor.h.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/server/Makefile.diff?cvsroot=cluster&r1=1.9&r2=1.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/server/gnbd_clusterd.c.diff?cvsroot=cluster&r1=1.5&r2=1.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/utils/group.c.diff?cvsroot=cluster&r1=1.2&r2=NONE
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gnbd/utils/group.h.diff?cvsroot=cluster&r1=1.1&r2=NONE

--- cluster/gnbd/client/Makefile	2006/08/11 15:18:14	1.9
+++ cluster/gnbd/client/Makefile	2006/10/13 22:32:30	1.10
@@ -19,12 +19,11 @@
 	$(top_srcdir)/utils/gnbd_utils.c
 
 MONITOR_SRC= gnbd_monitor.c monitor_req.c $(top_srcdir)/utils/trans.c \
-	$(top_srcdir)/utils/gnbd_utils.c $(top_srcdir)/utils/group.c \
+	$(top_srcdir)/utils/gnbd_utils.c \
 	$(top_srcdir)/utils/member_cman.c
 
 INCLUDE= -I$(top_srcdir)/include -I$(top_srcdir)/server -I$(top_srcdir)/utils \
-	-I${top_srcdir}/config -I${gnbdkincdir} -I${incdir} -I${cmanincdir} \
-	-I../../group/lib
+	-I${top_srcdir}/config -I${gnbdkincdir} -I${incdir} -I${cmanincdir}
 
 ifneq (${KERNEL_SRC}, )
 # Use the kernel tree if patched, otherwise, look where cluster headers
@@ -33,7 +32,7 @@
 		echo '-I${KERNEL_SRC}/include'; fi)
 endif
 
-LDLIBS+= -L${libdir} -L${cmanincdir} -L../../group/lib -lcman -lgroup -ldl -lpthread
+LDLIBS+= -L${libdir} -L${cmanincdir} -lcman -ldl -lpthread
 
 CFLAGS+= -O2 -DGNBD_RELEASE_NAME=\"${RELEASE}\"
 
--- cluster/gnbd/client/gnbd_monitor.c	2006/08/11 15:18:14	1.13
+++ cluster/gnbd/client/gnbd_monitor.c	2006/10/13 22:32:30	1.14
@@ -28,7 +28,6 @@
 #include <netinet/in.h>
 
 #include "gnbd.h"
-#include "group.h"
 #include "member_cman.h"
 #include "gnbd_endian.h"
 #include "list.h"
@@ -52,8 +51,15 @@
 };
 typedef struct waiter_s waiter_t;
 
+struct down_node_s {
+  int nodeid;
+  list_t list;
+};
+typedef struct down_node_s down_node_t;
+
 #define MAX_NODES 256
 
+list_decl(down_node_list);
 list_decl(waiter_list);
 connection_t *connections;
 struct pollfd *polls;
@@ -63,8 +69,6 @@
 cman_handle_t ch;
 cman_node_t nodes[MAX_NODES];
 int num_nodes;
-cman_node_t old_nodes[MAX_NODES];
-int old_num_nodes;
 int cman_cb;
 int cman_reason;
 
@@ -74,7 +78,6 @@
 
 #define CLUSTER 0
 #define CONNECT 1
-#define GROUP 2
 
 list_t monitor_list;
 
@@ -159,15 +162,7 @@
   connections[CLUSTER].dev = -1;
   polls[CONNECT].fd = start_comm_device("gnbd_monitorcomm");
   polls[CONNECT].events = POLLIN;
-  polls[GROUP].fd = setup_groupd("gnbd_monitor");
-  if (polls[GROUP].fd < 0)
-    fail_startup("cannot get group fd\n");
-  polls[GROUP].events = POLLIN;
-  connections[GROUP].buf = NULL;
-  connections[GROUP].action = 0;
-  connections[GROUP].size = 0;
-  connections[GROUP].dev = -1;
-  for(i = 3; i < open_max(); i++){
+  for(i = 2; i < open_max(); i++){
     polls[i].fd = -1;
     polls[i].revents = 0;
   }
@@ -186,10 +181,6 @@
     /* FIXME -- again, don't do this */
     exit(1);
   }
-  if (index == GROUP){
-    log_err("lost connection to groupd\n");
-     exit(1);
-  }
   polls[index].fd = -1;
   polls[index].revents = 0;
   free(connections[index].buf);
@@ -356,6 +347,7 @@
   waiter_t *waiter;
 
   block_sigchld();
+  dev->state = FAILED_STATE;
   
   list_foreach_safe(list_item, &waiter_list, tmp) {
     waiter = list_entry(list_item, waiter_t, list);
@@ -375,14 +367,75 @@
   unblock_sigchld();
 }
 
-static void statechange(void)
+static void fail_devices(char *node)
 {
-  int ret;
   monitor_t *dev;
+  list_t *item;
+
+  list_foreach(item, &monitor_list) {
+    dev = list_entry(item, monitor_t, list);
+    if (strcmp(dev->server, node) == 0)
+      fail_device(dev);
+  }
+}
+
+static char *nodeid_to_name(int nodeid)
+{
+  int i;
+
+  for(i = 0; i < num_nodes; i++)
+    if (nodes[i].cn_nodeid == nodeid)
+      return nodes[i].cn_name;
+  log_err("cannot find node that matches nodeid %d\n", nodeid);
+  exit(1);
+}
+
+static void check_down_nodes(void)
+{
+  uint64_t fence_time;
+  int fenced;
+  down_node_t *node;
   list_t *item, *next;
+  
+  list_foreach_safe(item, &down_node_list, next){
+    node = list_entry(item, down_node_t, list);
+    if (cman_get_fenceinfo(ch, node->nodeid, &fence_time, &fenced, NULL) < 0) {
+      log_err("cannot get fence info for nodeid %d : %s\n", node->nodeid,
+              strerror(errno));
+      exit(1);
+    }
+    if (fenced){
+      fail_devices(nodeid_to_name(node->nodeid));
+      list_del(&node->list);
+      free(node);
+    }
+  }
+}
 
-  old_num_nodes = num_nodes;
-  memcpy(&old_nodes, &nodes, sizeof(old_nodes));
+static down_node_t *get_down_node(int nodeid)
+{
+  list_t *item;
+  down_node_t *node;
+
+  list_foreach(item, &down_node_list) {
+    node = list_entry(item, down_node_t, list);
+    if (node->nodeid == nodeid)
+      return node;
+  }
+  return NULL;
+}
+
+static void get_initial_nodelist(void)
+{
+  if (cman_get_nodes(ch, MAX_NODES, &num_nodes, nodes) < 0) {
+    log_err("can't get initial cluster node list : %s\n", strerror(errno));
+    exit(1);
+  }
+}
+
+static void statechange(void)
+{
+  int ret, i;
 
   num_nodes = 0;
   memset(&nodes, 0, sizeof(nodes));
@@ -391,13 +444,41 @@
     log_err("can't get cluster node list : %s\n", strerror(errno));
     exit(1);
   }
-  list_foreach_safe(item, &monitor_list, next){
-    dev = list_entry(item, monitor_t, list);
-    if (check_for_node(old_nodes, old_num_nodes, dev->server) &&
-        !check_for_node(nodes,  num_nodes, dev->server))
-      fail_device(dev);
+  for (i = 0; i < num_nodes; i++){
+    if (nodes[i].cn_member) {
+      down_node_t *node = get_down_node(nodes[i].cn_nodeid);
+      if (!node)
+        continue;
+      fail_devices(nodes[i].cn_name);
+      list_del(&node->list);
+      free(node);
+    }
+    else {
+      monitor_t *dev;
+      list_t *item;
+      if (get_down_node(nodes[i].cn_nodeid))
+        continue;
+      list_foreach(item, &monitor_list) {
+        down_node_t *node;
+        dev = list_entry(item, monitor_t, list);
+	if (strcmp(dev->server, nodes[i].cn_name) != 0)
+          continue;
+        if (dev->state == RESET_STATE || dev->state == RESTARTABLE_STATE ||
+            dev->state == FAILED_STATE)
+          continue;
+        node = malloc(sizeof(down_node_t));
+        if (!node) {
+          log_err("cannot allocate memory for down node %s\n",
+                  nodes[i].cn_name);
+          exit(1);
+        }
+        node->nodeid = nodes[i].cn_nodeid;
+        list_add(&node->list, &down_node_list);
+        break;
+      }
+    }
   }
-}
+}       
 
 void handle_cluster_msg(void)
 {
@@ -608,7 +689,7 @@
     exit(1);
   for(i = open_max()-1; i > 2; --i) 
     close(i);
-  execlp("gnbd_recvd", "gnbd_recvd", "-f", "-d", minor_str);
+  execlp("gnbd_recvd", "gnbd_recvd", "-f", "-d", minor_str, NULL);
   exit(1);
 }
 
@@ -661,9 +742,9 @@
             log_err("cman_admin_init failure : %s\n", strerror(errno));
             goto cant_fence;
           }
-          if (cman_kill_node(ch, server->cn_nodeid) < 0){
+          if (cman_kill_node(ach, server->cn_nodeid) < 0){
             log_err("fence of %s failed : %s\n", dev->server, strerror(errno));
-            cman_finish(ch);
+            cman_finish(ach);
             goto cant_fence;
           }
           cman_finish(ach);
@@ -685,6 +766,7 @@
         start_recvd(dev);
       break;
     /* FENCED_STATE */
+    /* FAILED_STATE */
     }
   }
 }
@@ -717,6 +799,9 @@
     case FENCED_STATE:
       strcpy(state, "fenced");
       break;
+    case FAILED_STATE:
+      strcpy(state, "failed");
+      break;
     }
     printf("%8d   %7d   %s\n", ptr->minor_nr, ptr->timeout, state);
   }
@@ -734,8 +819,11 @@
       log_err("poll error : %s\n", strerror(errno));
     return;
   }
-  if (err == 0)
+  if (err == 0) {
     check_devices();
+    check_down_nodes();
+    return;
+  }
   for (i = 0; i <= max_id; i++){
     if (polls[i].revents & (POLLERR | POLLHUP | POLLNVAL)){
       log_err("Bad poll result, 0x%x on id %d\n", polls[i].revents, i);
@@ -747,8 +835,6 @@
         accept_connection();
       else if (i == CLUSTER)
         handle_cluster_msg();
-      else if (i == GROUP)
-        default_process_groupd();
       else
         handle_msg(i);
     }
@@ -819,14 +905,13 @@
   list_init(&monitor_list);
 
   setup_poll();
-
   err = monitor_device(minor_nr, timeout, argv[3]);
   if (err)
     fail_startup("cannot add device #%d to monitor_list : %s\n", minor_nr,
                  strerror(err));
   
   finish_startup("gnbd_monitor started. Monitoring device #%d\n", minor_nr);
-  
+  get_initial_nodelist();  
   while(1){
     do_poll();
   }
--- cluster/gnbd/client/gnbd_monitor.h	2004/08/14 01:33:20	1.3
+++ cluster/gnbd/client/gnbd_monitor.h	2006/10/13 22:32:30	1.4
@@ -23,6 +23,7 @@
 #define RESET_STATE 2
 #define RESTARTABLE_STATE 3
 #define FENCED_STATE 4
+#define FAILED_STATE 5
 
 struct monitor_info_s {
   int minor_nr;
--- cluster/gnbd/server/Makefile	2006/08/11 15:18:14	1.9
+++ cluster/gnbd/server/Makefile	2006/10/13 22:32:30	1.10
@@ -17,16 +17,15 @@
 include ${top_srcdir}/make/defines.mk
 
 CLU_SOURCE= gnbd_clusterd.c $(top_srcdir)/utils/gnbd_utils.c \
-	$(top_srcdir)/utils/member_cman.c $(top_srcdir)/utils/group.c
+	$(top_srcdir)/utils/member_cman.c
 
-LDLIBS+= -L${libdir} -L${cmanincdir} -L../../group/lib -lcman -lgroup -ldl -lpthread
+LDLIBS+= -L${libdir} -L${cmanincdir} -lcman -ldl -lpthread
 
 SRV_SOURCE= gnbd_serv.c local_req.c extern_req.c device.c gserv.c fence.c \
 	$(top_srcdir)/utils/trans.c $(top_srcdir)/utils/gnbd_utils.c
 
-INCLUDE= -I$(top_srcdir)/include -I$(top_srcdir)/utils -I${groupincdir}\
-	-I${top_srcdir}/config -I${gnbdkincdir} -I${incdir} -I${cmanincdir} \
-	-I../../group/lib
+INCLUDE= -I$(top_srcdir)/include -I$(top_srcdir)/utils \
+	-I${top_srcdir}/config -I${gnbdkincdir} -I${incdir} -I${cmanincdir}
 
 ifneq (${KERNEL_SRC}, )
 # Use the kernel tree if patched, otherwise, look where cluster headers
--- cluster/gnbd/server/gnbd_clusterd.c	2006/05/16 19:08:17	1.5
+++ cluster/gnbd/server/gnbd_clusterd.c	2006/10/13 22:32:30	1.6
@@ -20,15 +20,12 @@
 
 #include "gnbd_utils.h"
 #include "member_cman.h"
-#include "group.h"
 
 
 #define CMAN 0
-#define GROUP 1
 
-struct pollfd polls[2];
+struct pollfd polls[1];
 static int quit = 0;
-group_callbacks_t callbacks;
 
 static void sig_usr1(int sig)
 {}
@@ -56,28 +53,16 @@
 {
   polls[CMAN].fd = setup_member(NULL);
   if (polls[CMAN].fd < 0)
-    finish_startup("cannot join cman\n");
-  polls[GROUP].fd = setup_groupd("gnbd_clusterd");
-  if (polls[GROUP].fd < 0) {
-    exit_member();
-    fail_startup("cannot init group\n");
-  }
-  if (group_join(gh, "default")) {
-    exit_groupd();
-    exit_member();
-    fail_startup("cannot join group\n");
-  }
+    fail_startup("cannot join cman\n");
   polls[CMAN].events = POLLIN;
   polls[CMAN].revents = 0;
-  polls[GROUP].events = POLLIN;
-  polls[GROUP].revents = 0;
 }
 
 void do_poll(void)
 {
   int err;
 
-  err = poll(polls, 2, -1);
+  err = poll(polls, 1, -1);
   if (err < 0) {
     if (errno != EINTR)
       log_err("poll error : %s\n", strerror(errno));
@@ -87,15 +72,9 @@
     log_err("Bad poll result 0x%x from cluster\n", polls[CMAN].revents);
     exit(1);
   }
-  if (polls[GROUP].revents & (POLLERR | POLLHUP | POLLNVAL)) {
-    log_err("Bad poll result 0x%x from groupd\n", polls[GROUP].revents);
-    exit(1);
-  }
 
   if (polls[CMAN].revents & POLLIN)
     default_process_member();
-  if (polls[GROUP].revents & POLLIN)
-    default_process_groupd();
 }
     
 int main(int argc, char **argv){
@@ -137,8 +116,6 @@
   while(!quit){
     do_poll();
   }
-  group_leave(gh, "default");
-  group_exit(gh);
   cman_finish(ch);
   return 0;
 } 



                 reply	other threads:[~2006-10-13 22:32 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061013223233.6038.qmail@sourceware.org \
    --to=bmarzins@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.