From mboxrd@z Thu Jan 1 00:00:00 1970 From: rmccabe@sourceware.org Date: 4 Sep 2007 18:28:44 -0000 Subject: [Cluster-devel] conga/ricci/modules/cluster ClusterConf.h Clus ... Message-ID: <20070904182844.20305.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: conga Changes by: rmccabe at sourceware.org 2007-09-04 18:28:40 Modified files: ricci/modules/cluster: ClusterConf.h ClusterModule.h ClusterStatus.h Clusvcadm.h Fence.h NoServiceManager.h ricci/modules/cluster/clumon/src/common: Cluster.cpp ClusterMonitor.cpp Node.cpp Service.cpp ricci/modules/cluster/clumon/src/daemon: Communicator.cpp Communicator.h Monitor.cpp Monitor.h Peer.cpp Peer.h main.cpp ricci/modules/cluster/clumon/src/include: Cluster.h ClusterMonitor.h clumond_globals.h Log message: More cleanup and fixes for mostly minor issues Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/ClusterConf.h.diff?cvsroot=cluster&r1=1.4&r2=1.5 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/ClusterModule.h.diff?cvsroot=cluster&r1=1.2&r2=1.3 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/ClusterStatus.h.diff?cvsroot=cluster&r1=1.5&r2=1.6 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/Clusvcadm.h.diff?cvsroot=cluster&r1=1.4&r2=1.5 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/Fence.h.diff?cvsroot=cluster&r1=1.3&r2=1.4 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/NoServiceManager.h.diff?cvsroot=cluster&r1=1.2&r2=1.3 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/common/Cluster.cpp.diff?cvsroot=cluster&r1=1.6&r2=1.7 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/common/ClusterMonitor.cpp.diff?cvsroot=cluster&r1=1.2&r2=1.3 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/common/Node.cpp.diff?cvsroot=cluster&r1=1.4&r2=1.5 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/common/Service.cpp.diff?cvsroot=cluster&r1=1.3&r2=1.4 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/daemon/Communicator.cpp.diff?cvsroot=cluster&r1=1.4&r2=1.5 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/daemon/Communicator.h.diff?cvsroot=cluster&r1=1.3&r2=1.4 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/daemon/Monitor.cpp.diff?cvsroot=cluster&r1=1.15&r2=1.16 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/daemon/Monitor.h.diff?cvsroot=cluster&r1=1.6&r2=1.7 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/daemon/Peer.cpp.diff?cvsroot=cluster&r1=1.3&r2=1.4 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/daemon/Peer.h.diff?cvsroot=cluster&r1=1.2&r2=1.3 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/daemon/main.cpp.diff?cvsroot=cluster&r1=1.5&r2=1.6 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/include/Cluster.h.diff?cvsroot=cluster&r1=1.6&r2=1.7 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/include/ClusterMonitor.h.diff?cvsroot=cluster&r1=1.2&r2=1.3 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/clumon/src/include/clumond_globals.h.diff?cvsroot=cluster&r1=1.1&r2=1.2 --- conga/ricci/modules/cluster/ClusterConf.h 2007/08/23 15:30:27 1.4 +++ conga/ricci/modules/cluster/ClusterConf.h 2007/09/04 18:28:40 1.5 @@ -21,8 +21,8 @@ */ -#ifndef ClusterConf_h -#define ClusterConf_h +#ifndef __CONGA_CLUSTERCONF_H +#define __CONGA_CLUSTERCONF_H #include "XML.h" @@ -37,4 +37,4 @@ static bool is_cman(const XMLObject& cluster_conf); }; -#endif // ClusterConf_h +#endif --- conga/ricci/modules/cluster/ClusterModule.h 2007/08/23 15:30:27 1.2 +++ conga/ricci/modules/cluster/ClusterModule.h 2007/09/04 18:28:40 1.3 @@ -21,8 +21,8 @@ */ -#ifndef ClusterModule_h -#define ClusterModule_h +#ifndef __CONGA_CLUSTERMODULE_H +#define __CONGA_CLUSTERMODULE_H #include "Module.h" @@ -35,4 +35,4 @@ private: }; -#endif // ClusterModule_h +#endif --- conga/ricci/modules/cluster/ClusterStatus.h 2007/08/23 15:30:27 1.5 +++ conga/ricci/modules/cluster/ClusterStatus.h 2007/09/04 18:28:40 1.6 @@ -21,8 +21,8 @@ */ -#ifndef ClusterStatus_h -#define ClusterStatus_h +#ifndef __CONGA_CLUSTERSTATUS_H +#define __CONGA_CLUSTERSTATUS_H #include "XML.h" @@ -35,4 +35,4 @@ bool purge_conf = false); }; -#endif // ClusterStatus_h +#endif --- conga/ricci/modules/cluster/Clusvcadm.h 2007/08/23 15:30:27 1.4 +++ conga/ricci/modules/cluster/Clusvcadm.h 2007/09/04 18:28:40 1.5 @@ -20,8 +20,8 @@ * Author: Stanko Kupcevic */ -#ifndef Clusvcadm_h -#define Clusvcadm_h +#ifndef __CONGA_CLUSVCADM_H +#define __CONGA_CLUSVCADM_H #include "String.h" @@ -34,4 +34,4 @@ static void stop(const String& servicename); }; -#endif // Clusvcadm_h +#endif --- conga/ricci/modules/cluster/Fence.h 2007/08/23 15:30:27 1.3 +++ conga/ricci/modules/cluster/Fence.h 2007/09/04 18:28:40 1.4 @@ -21,16 +21,15 @@ */ -#ifndef Fence_h -#define Fence_h +#ifndef __CONGA_FENCE_H +#define __CONGA_FENCE_H #include "String.h" - class Fence { public: static void fence_node(const String& nodename); }; -#endif // Fence_h +#endif --- conga/ricci/modules/cluster/NoServiceManager.h 2007/08/23 15:30:27 1.2 +++ conga/ricci/modules/cluster/NoServiceManager.h 2007/09/04 18:28:40 1.3 @@ -21,8 +21,8 @@ */ -#ifndef NoServiceManager_h -#define NoServiceManager_h +#ifndef __CONGA_NOSERVICEMANAGER_H +#define __CONGA_NOSERVICEMANAGER_H #include "Except.h" @@ -34,4 +34,4 @@ virtual ~NoServiceManager() {} }; -#endif // NoServiceManager_h +#endif --- conga/ricci/modules/cluster/clumon/src/common/Cluster.cpp 2006/08/15 00:12:32 1.6 +++ conga/ricci/modules/cluster/clumon/src/common/Cluster.cpp 2007/09/04 18:28:40 1.7 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -13,7 +13,7 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -29,360 +29,400 @@ using namespace ClusterMonitoring; -Cluster::Cluster(const String &name, - const String &alias, - const String &cluster_version, - unsigned int minQuorum) : - _name(name), - _alias(alias), - _cl_version(cluster_version), - _minQuorum(minQuorum) +Cluster::Cluster( const String &name, + const String &alias, + const String &cluster_version, + unsigned int minQuorum) : + _name(name), + _alias(alias), + _cl_version(cluster_version), + _minQuorum(minQuorum) { - // add no-node node - addNode("", 0, false, false, ""); + // add no-node node + addNode("", 0, false, false, ""); } Cluster::~Cluster(void) {} - - -String +String Cluster::name() { - return _name; + return _name; } -String +String Cluster::alias() { - return _alias; + return _alias; } String Cluster::version() { - return _cl_version; + return _cl_version; } -unsigned int +unsigned int Cluster::votes() { - unsigned int votes = 0; - for (map >::iterator iter = _nodes.begin(); - iter != _nodes.end(); - iter++) { - Node& node = *(iter->second); - if (node.clustered()) - votes += node.votes(); - } - return votes; + unsigned int votes = 0; + for (map >::iterator + iter = _nodes.begin() ; + iter != _nodes.end() ; + iter++) + { + Node& node = *(iter->second); + if (node.clustered()) + votes += node.votes(); + } + return votes; } -unsigned int +unsigned int Cluster::minQuorum() { - if (_minQuorum != 0) - return _minQuorum; - else { - unsigned int votes = 0; - list > nodes = this->nodes(); - for (list >::iterator iter = nodes.begin(); - iter != nodes.end(); - iter++) - votes += (*iter)->votes(); - return votes/2 + 1; - } -} + if (_minQuorum != 0) + return _minQuorum; + else { + unsigned int votes = 0; + list > nodes = this->nodes(); + for (list >::iterator + iter = nodes.begin() ; + iter != nodes.end() ; + iter++) + { + votes += (*iter)->votes(); + } -bool -Cluster::quorate() -{ - return votes() >= minQuorum(); + return votes / 2 + 1; + } } - -counting_auto_ptr -Cluster::addNode(const String& name, - unsigned int votes, - bool online, - bool clustered, - const String& uptime) +bool +Cluster::quorate() { - counting_auto_ptr node(new Node(name, _name, votes, online, clustered, uptime)); - if (_nodes.insert(pair >(name, node)).second) - return node; - else - // already present - return _nodes[name]; + return votes() >= minQuorum(); } -counting_auto_ptr -Cluster::addService(const String& name, - const String& nodeName, - bool failed, - bool autostart, - const String& time_since_transition) -{ - map >::iterator iter = _nodes.find(nodeName); - if (iter == _nodes.end()) - throw String("Cluster::addService(): add node first"); - return iter->second->addService(name, failed, autostart, time_since_transition); +counting_auto_ptr +Cluster::addNode( const String& name, + unsigned int votes, + bool online, + bool clustered, + const String& uptime) +{ + counting_auto_ptr node(new Node(name, _name, votes, online, clustered, uptime)); + if (_nodes.insert(pair >(name, node)).second) + return node; + else { + // already present + return _nodes[name]; + } +} + +counting_auto_ptr +Cluster::addService(const String& name, + const String& nodeName, + bool failed, + bool autostart, + const String& time_since_transition) +{ + map >::iterator iter = _nodes.find(nodeName); + if (iter == _nodes.end()) + throw String("Cluster::addService(): add node first"); + return iter->second->addService(name, failed, autostart, time_since_transition); } - -list > +list > Cluster::nodes() { - list > ret; - - for (map >::iterator iter = _nodes.begin(); - iter != _nodes.end(); - iter++) { - counting_auto_ptr& node = iter->second; - if (!node->name().empty()) - ret.push_back(node); - } - return ret; + list > ret; + + for (map >::iterator + iter = _nodes.begin() ; + iter != _nodes.end() ; + iter++) + { + counting_auto_ptr& node = iter->second; + if (!node->name().empty()) + ret.push_back(node); + } + return ret; } -std::list > +std::list > Cluster::clusteredNodes() { - list > ret; - - for (map >::iterator iter = _nodes.begin(); - iter != _nodes.end(); - iter++) { - counting_auto_ptr& node = iter->second; - if (node->name().size() && node->clustered()) - ret.push_back(node); - } - return ret; + list > ret; + + for (map >::iterator + iter = _nodes.begin() ; + iter != _nodes.end() ; + iter++) + { + counting_auto_ptr& node = iter->second; + if (node->name().size() && node->clustered()) + ret.push_back(node); + } + return ret; } -list > +list > Cluster::unclusteredNodes() { - list > ret; - - for (map >::iterator iter = _nodes.begin(); - iter != _nodes.end(); - iter++) { - counting_auto_ptr& node = iter->second; - if (node->name().size() && !node->clustered()) - ret.push_back(node); - } - return ret; -} + list > ret; + for (map >::iterator + iter = _nodes.begin() ; + iter != _nodes.end() ; + iter++) + { + counting_auto_ptr& node = iter->second; + if (node->name().size() && !node->clustered()) + ret.push_back(node); + } + return ret; +} -list > +list > Cluster::services() { - list > ret; - - for (map >::iterator iter = _nodes.begin(); - iter != _nodes.end(); - iter++) { - list > services = iter->second->services(); - ret.insert(ret.end(), services.begin(), services.end()); - } - return ret; + list > ret; + + for (map >::iterator + iter = _nodes.begin() ; + iter != _nodes.end() ; + iter++) + { + list > services = iter->second->services(); + ret.insert(ret.end(), services.begin(), services.end()); + } + return ret; } -list > +list > Cluster::runningServices() { - list > ret; - - list > nodes = this->nodes(); - for (list >::iterator iter = nodes.begin(); - iter != nodes.end(); - iter++) { - counting_auto_ptr& node = *iter; - list > services = node->services(); - if (node->name().size()) - ret.insert(ret.end(), services.begin(), services.end()); - } - return ret; + list > ret; + + list > nodes = this->nodes(); + for (list >::iterator + iter = nodes.begin() ; + iter != nodes.end() ; + iter++) + { + counting_auto_ptr& node = *iter; + list > services = node->services(); + if (node->name().size()) + ret.insert(ret.end(), services.begin(), services.end()); + } + return ret; } -std::list > +std::list > Cluster::stoppedServices() { - list > ret; - list > services = _nodes.find("")->second->services(); - for (list >::iterator iter = services.begin(); - iter != services.end(); - iter++) { - counting_auto_ptr& service = *iter; - if (!service->running() && !service->failed()) - ret.push_back(service); - } - return ret; + list > ret; + list > services = + _nodes.find("")->second->services(); + + for (list >::iterator + iter = services.begin() ; + iter != services.end() ; + iter++) + { + counting_auto_ptr& service = *iter; + if (!service->running() && !service->failed()) + ret.push_back(service); + } + return ret; } -std::list > +std::list > Cluster::failedServices() { - list > ret; - list > services = _nodes.find("")->second->services(); - for (list >::iterator iter = services.begin(); - iter != services.end(); - iter++) { - counting_auto_ptr& service = *iter; - if (service->failed()) - ret.push_back(service); - } - return ret; + list > ret; + list > services = + _nodes.find("")->second->services(); + + for (list >::iterator + iter = services.begin() ; + iter != services.end() ; + iter++) + { + counting_auto_ptr& service = *iter; + if (service->failed()) + ret.push_back(service); + } + return ret; } - - - -String +String ClusterMonitoring::cluster2xml(Cluster& cluster) { - char buff[1024]; - - // cluster - XMLObject clu("cluster"); - clu.set_attr("name", cluster.name()); - clu.set_attr("alias", cluster.alias()); - clu.set_attr("cluster_version", cluster.version()); - sprintf(buff, "%u", cluster.votes()); - clu.set_attr("votes", buff); - sprintf(buff, "%u", cluster.minQuorum()); - clu.set_attr("minQuorum", buff); - clu.set_attr("quorate", (cluster.quorate()) ? "true" : "false"); - - // nodes - std::list > nodes = cluster.nodes(); - for (std::list >::iterator iter = nodes.begin(); - iter != nodes.end(); - iter++) { - Node& node = **iter; - XMLObject n("node"); - n.set_attr("name", node.name()); - sprintf(buff, "%u", node.votes()); - n.set_attr("votes", buff); - n.set_attr("online", (node.online()) ? "true" : "false"); - n.set_attr("clustered", (node.clustered()) ? "true" : "false"); - n.set_attr("uptime", node.uptime()); - clu.add_child(n); - } - - // services - std::list > services = cluster.services(); - for (std::list >::iterator iter = services.begin(); - iter != services.end(); - iter++) { - Service& service = **iter; - XMLObject s("service"); - s.set_attr("name", service.name()); - s.set_attr("running", (service.running()) ? "true" : "false"); - if (service.running()) - s.set_attr("nodename", service.nodename()); - else - s.set_attr("nodename", ""); - s.set_attr("failed", (service.failed()) ? "true" : "false"); - s.set_attr("autostart", (service.autostart()) ? "true" : "false"); - s.set_attr("time_since_transition", service.time_since_transition()); - clu.add_child(s); - } - - return generateXML(clu); + char buff[64]; + + // cluster + XMLObject clu("cluster"); + clu.set_attr("name", cluster.name()); + clu.set_attr("alias", cluster.alias()); + clu.set_attr("cluster_version", cluster.version()); + + snprintf(buff, sizeof(buff), "%u", cluster.votes()); + clu.set_attr("votes", buff); + + snprintf(buff, sizeof(buff), "%u", cluster.minQuorum()); + clu.set_attr("minQuorum", buff); + + clu.set_attr("quorate", (cluster.quorate()) ? "true" : "false"); + + // nodes + std::list > nodes = cluster.nodes(); + for (std::list >::iterator + iter = nodes.begin() ; + iter != nodes.end() ; + iter++) + { + Node& node = **iter; + XMLObject n("node"); + n.set_attr("name", node.name()); + + snprintf(buff, sizeof(buff), "%u", node.votes()); + n.set_attr("votes", buff); + + n.set_attr("online", (node.online()) ? "true" : "false"); + n.set_attr("clustered", (node.clustered()) ? "true" : "false"); + n.set_attr("uptime", node.uptime()); + clu.add_child(n); + } + + // services + std::list > services = cluster.services(); + for (std::list >::iterator + iter = services.begin() ; + iter != services.end() ; + iter++) + { + Service& service = **iter; + XMLObject s("service"); + s.set_attr("name", service.name()); + s.set_attr("running", (service.running()) ? "true" : "false"); + if (service.running()) + s.set_attr("nodename", service.nodename()); + else + s.set_attr("nodename", ""); + s.set_attr("failed", (service.failed()) ? "true" : "false"); + s.set_attr("autostart", (service.autostart()) ? "true" : "false"); + s.set_attr("time_since_transition", service.time_since_transition()); + clu.add_child(s); + } + + return generateXML(clu); } -counting_auto_ptr +counting_auto_ptr ClusterMonitoring::xml2cluster(const String& xml) { - XMLObject clu = parseXML(xml); - if (clu.tag() != "cluster") - throw String("xml2cluster(): invalid xml"); - - // cluster - String name = clu.get_attr("name"); - if (name.empty()) - throw String("xml2cluster(): missing cluster name"); - unsigned int minQuorum = 0; - if (sscanf(clu.get_attr("minQuorum").c_str(), "%u", &minQuorum) != 1) - throw String("xml2cluster(): invalid value for cluster's minQuorum"); - String alias = clu.get_attr("alias"); - String cl_version = clu.get_attr("cluster_version"); - counting_auto_ptr cluster(new Cluster(name, - alias, - cl_version, - minQuorum)); - - // nodes - for (list::const_iterator iter = clu.children().begin(); - iter != clu.children().end(); - iter++) { - const XMLObject& obj = *iter; - if (obj.tag() == "node") { - // name - String node_name = obj.get_attr("name"); - if (node_name.empty()) - throw String("xml2cluster(): node missing 'name' attr"); - // votes - unsigned int votes; - if (sscanf(obj.get_attr("votes").c_str(), "%u", &votes) != 1) - throw String("xml2cluster(): invalid value for node's votes"); - // online - String online_str = obj.get_attr("online"); - bool online = online_str == "true"; - if (online_str.empty()) - throw String("xml2cluster(): node missing 'online' attr"); - // clustered - String clustered_str = obj.get_attr("clustered"); - bool clustered = clustered_str == "true"; - if (clustered_str.empty()) - throw String("xml2cluster(): node missing 'clustered' attr"); - // uptime - String uptime = obj.get_attr("uptime"); - // add node to cluster - cluster->addNode(node_name, votes, online, clustered, uptime); - } - } - - // services - for (list::const_iterator iter = clu.children().begin(); - iter != clu.children().end(); - iter++) { - const XMLObject& obj = *iter; - if (obj.tag() == "service") { - // name - String service_name = obj.get_attr("name"); - if (service_name.empty()) - throw String("xml2cluster(): service missing 'name' attr"); - // running - String running_str = obj.get_attr("running"); - bool running = running_str == "true"; - if (running_str.empty()) - throw String("xml2cluster(): service missing 'running' attr"); - // nodename - String nodename = obj.get_attr("nodename"); - if (running) - if (nodename.empty()) - throw String("xml2cluster(): running service missing 'nodename' attr"); - // failed - String failed_str = obj.get_attr("failed"); - bool failed = failed_str == "true"; - if (failed_str.empty()) - throw String("xml2cluster(): service missing 'failed' attr"); - // autostart - String autostart_str = obj.get_attr("autostart"); - bool autostart = autostart_str == "true"; - if (autostart_str.empty()) - throw String("xml2cluster(): service missing 'autostart' attr"); - // time since last transition - String time_since_transition = obj.get_attr("time_since_transition"); - // add service to cluster - cluster->addService(service_name, nodename, failed, autostart, time_since_transition); - } - } - - return cluster; + XMLObject clu = parseXML(xml); + if (clu.tag() != "cluster") + throw String("xml2cluster(): invalid xml"); + + // cluster + String name = clu.get_attr("name"); + if (name.empty()) + throw String("xml2cluster(): missing cluster name"); + + unsigned int minQuorum = 0; + if (sscanf(clu.get_attr("minQuorum").c_str(), "%u", &minQuorum) != 1) + throw String("xml2cluster(): invalid value for cluster's minQuorum"); + + String alias = clu.get_attr("alias"); + String cl_version = clu.get_attr("cluster_version"); + + counting_auto_ptr cluster(new Cluster(name, alias, cl_version, minQuorum)); + + // nodes + for (list::const_iterator + iter = clu.children().begin() ; + iter != clu.children().end() ; + iter++) + { + const XMLObject& obj = *iter; + if (obj.tag() == "node") { + // name + String node_name = obj.get_attr("name"); + if (node_name.empty()) + throw String("xml2cluster(): node missing 'name' attr"); + + // votes + unsigned int votes; + if (sscanf(obj.get_attr("votes").c_str(), "%u", &votes) != 1) + throw String("xml2cluster(): invalid value for node's votes"); + + // online + String online_str = obj.get_attr("online"); + bool online = online_str == "true"; + if (online_str.empty()) + throw String("xml2cluster(): node missing 'online' attr"); + + // clustered + String clustered_str = obj.get_attr("clustered"); + bool clustered = clustered_str == "true"; + if (clustered_str.empty()) + throw String("xml2cluster(): node missing 'clustered' attr"); + + // uptime + String uptime = obj.get_attr("uptime"); + + // add node to cluster + cluster->addNode(node_name, votes, online, clustered, uptime); + } + } + + // services + for (list::const_iterator + iter = clu.children().begin() ; + iter != clu.children().end() ; + iter++) + { + const XMLObject& obj = *iter; + if (obj.tag() == "service") { + // name + String service_name = obj.get_attr("name"); + if (service_name.empty()) + throw String("xml2cluster(): service missing 'name' attr"); + + // running + String running_str = obj.get_attr("running"); + bool running = running_str == "true"; + if (running_str.empty()) + throw String("xml2cluster(): service missing 'running' attr"); + + // nodename + String nodename = obj.get_attr("nodename"); + if (running && nodename.empty()) + throw String("xml2cluster(): service missing 'nodename' attr"); + + // failed + String failed_str = obj.get_attr("failed"); + bool failed = failed_str == "true"; + if (failed_str.empty()) + throw String("xml2cluster(): service missing 'failed' attr"); + + // autostart + String autostart_str = obj.get_attr("autostart"); + bool autostart = autostart_str == "true"; + if (autostart_str.empty()) + throw String("xml2cluster(): service missing 'autostart' attr"); + + // time since last transition + String tst = obj.get_attr("time_since_transition"); + + // add service to cluster + cluster->addService(service_name, nodename, failed, autostart, tst); + } + } + + return cluster; } --- conga/ricci/modules/cluster/clumon/src/common/ClusterMonitor.cpp 2006/08/10 22:53:08 1.2 +++ conga/ricci/modules/cluster/clumon/src/common/ClusterMonitor.cpp 2007/09/04 18:28:40 1.3 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -32,53 +32,56 @@ using namespace ClusterMonitoring; -ClusterMonitor::ClusterMonitor(const String& socket_path) : - _sock_path(socket_path) +ClusterMonitor::ClusterMonitor(const String& socket_path) : + _sock_path(socket_path) {} ClusterMonitor::~ClusterMonitor() {} - counting_auto_ptr ClusterMonitor::get_cluster() { - try { - ClientSocket sock(_sock_path); - - if(sock.send("GET").size()) - throw int(); - - String xml; - unsigned int timeout = 1000; - while (timeout > 0) { - struct pollfd poll_data; - poll_data.fd = sock.get_sock(); - poll_data.events = POLLIN; - poll_data.revents = 0; - - unsigned int time_start = time_mil(); - int ret = poll(&poll_data, 1, timeout); - timeout -= (time_mil() - time_start); - if (ret == 0) - continue; - else if (ret == -1) { - if (errno == EINTR) - continue; - else - throw String("get_cluster(): poll() error"); - } - if (poll_data.revents & POLLIN) { - xml += sock.recv(); - if (xml.find("\n\n") != xml.npos) - break; - continue; - } - if (poll_data.revents & (POLLERR | POLLHUP | POLLNVAL)) - throw String("get_cluster(): socket error"); - } - return xml2cluster(xml); - } catch ( ... ) { - return counting_auto_ptr(); - } + try { + ClientSocket sock(_sock_path); + + if(sock.send("GET").size()) + throw int(); + + String xml; + unsigned int timeout = 1000; + while (timeout > 0) { + struct pollfd poll_data; + poll_data.fd = sock.get_sock(); + poll_data.events = POLLIN; + poll_data.revents = 0; + + unsigned int time_start = time_mil(); + int ret = poll(&poll_data, 1, timeout); + timeout -= (time_mil() - time_start); + if (ret == 0) + continue; + else if (ret == -1) { + if (errno == EINTR) + continue; + else { + throw String("get_cluster(): poll() error") + + String(strerror(errno)); + } + } + + if (poll_data.revents & POLLIN) { + xml += sock.recv(); + if (xml.find("\n\n") != xml.npos) + break; + continue; + } + + if (poll_data.revents & (POLLERR | POLLHUP | POLLNVAL)) + throw String("get_cluster(): socket error"); + } + return xml2cluster(xml); + } catch ( ... ) { + return counting_auto_ptr(); + } } --- conga/ricci/modules/cluster/clumon/src/common/Node.cpp 2006/08/10 22:53:08 1.4 +++ conga/ricci/modules/cluster/clumon/src/common/Node.cpp 2007/09/04 18:28:40 1.5 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -13,7 +13,7 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -26,81 +26,83 @@ using namespace std; using namespace ClusterMonitoring; - -Node::Node(const String& name, - const String& clustername, - unsigned int votes, - bool online, - bool clustered, - const String& uptime) : - _name(name), - _clustername(clustername), - _votes(votes), - _online(online), - _clustered(clustered), - _uptime(uptime) +Node::Node( const String& name, + const String& clustername, + unsigned int votes, + bool online, + bool clustered, + const String& uptime) : + _name(name), + _clustername(clustername), + _votes(votes), + _online(online), + _clustered(clustered), + _uptime(uptime) {} Node::~Node(void) {} -String +String Node::name() const { - return _name; + return _name; } -String +String Node::clustername() const { - return _clustername; + return _clustername; } -unsigned int +unsigned int Node::votes() const { - return _votes; + return _votes; } -bool +bool Node::online() const { - return _online; + return _online; } -bool +bool Node::clustered() const { - return _clustered; + return _clustered; } String Node::uptime() const { - return _uptime; + return _uptime; } counting_auto_ptr -Node::addService(const String& name, - bool failed, - bool autostart, - const String& time_since_transition) -{ - counting_auto_ptr service(new Service(name, _clustername, *this, failed, autostart, time_since_transition)); - _services.insert(pair >(name, service)); - return service; +Node::addService( const String& name, + bool failed, + bool autostart, + const String& time_since_transition) +{ + counting_auto_ptr service(new Service(name, _clustername, *this, failed, autostart, time_since_transition)); + _services.insert(pair >(name, service)); + return service; } list > Node::services() { - list > ret; - - for (map >::iterator iter = _services.begin(); - iter != _services.end(); - iter++) - ret.push_back(iter->second); - return ret; + list > ret; + + for (map >::iterator + iter = _services.begin() ; + iter != _services.end() ; + iter++) + { + ret.push_back(iter->second); + } + return ret; } --- conga/ricci/modules/cluster/clumon/src/common/Service.cpp 2006/08/10 22:53:08 1.3 +++ conga/ricci/modules/cluster/clumon/src/common/Service.cpp 2007/09/04 18:28:40 1.4 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -13,7 +13,7 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -33,57 +33,56 @@ const Node& node, bool failed, bool autostart, - const String& time_since_transition) : - _name(name), - _clustername(clustername), - _nodename(node.name()), - _autostart(autostart), - _failed(failed), - _time_since_transition(time_since_transition) + const String& time_since_transition) : + _name(name), + _clustername(clustername), + _nodename(node.name()), + _autostart(autostart), + _failed(failed), + _time_since_transition(time_since_transition) {} Service::~Service(void) {} - String Service::name() const { - return _name; + return _name; } String Service::clustername() const { - return _clustername; + return _clustername; } -bool +bool Service::running() const { - return _nodename.size(); + return _nodename.size(); } String Service::nodename() const { - return _nodename; + return _nodename; } -bool +bool Service::failed() const { - return _failed; + return _failed; } -bool +bool Service::autostart() const { - return _autostart; + return _autostart; } String Service::time_since_transition() const { - return _time_since_transition; + return _time_since_transition; } --- conga/ricci/modules/cluster/clumon/src/daemon/Communicator.cpp 2006/10/14 18:00:02 1.4 +++ conga/ricci/modules/cluster/clumon/src/daemon/Communicator.cpp 2007/09/04 18:28:40 1.5 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -13,7 +13,7 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -35,12 +35,10 @@ #include #include - using namespace ClusterMonitoring; using namespace std; - CommDP::CommDP() {} @@ -48,211 +46,234 @@ {} - -Communicator::Communicator(unsigned short port, - CommDP& delivery_point) : - _port(port), - _serv_sock(_port), - _delivery_point(delivery_point) +Communicator::Communicator(unsigned short port, CommDP& delivery_point) : + _port(port), + _serv_sock(_port), + _delivery_point(delivery_point) { - _serv_sock.nonblocking(true); - _connect_time = time_sec(); - _rand_state = time_mil(); - log(String("Communicator created, port ") + _port, LogCommunicator); + _serv_sock.nonblocking(true); + _connect_time = time_sec(); + _rand_state = time_mil(); + log(String("Communicator created, port ") + _port, LogCommunicator); } Communicator::~Communicator() { - stop(); - log("Communicator deleted", LogCommunicator); + stop(); + log("Communicator deleted", LogCommunicator); } - -void + +void Communicator::send(const String& msg) { - MutexLocker l(_mutex); - _out_q.push_back(msg); + MutexLocker l(_mutex); + _out_q.push_back(msg); } -void -Communicator::update_peers(const String& self, - const std::vector& hosts) +void +Communicator::update_peers(const String& self, const std::vector& hosts) { - MutexLocker l(_mutex); - _my_hostname = self; - _peer_hostnames.clear(); - for (unsigned int i=0; i names; - vector que; - String my_hostname; - { - MutexLocker l(_mutex); - my_hostname = _my_hostname; - for (unsigned int i=0; i<_peer_hostnames.size(); i++) - names.push_back(_peer_hostnames[i]); - for (unsigned int i=0; i<_out_q.size(); i++) - que.push_back(_out_q[i]); - _out_q.clear(); - } - - // remove non-peers - vector remove_us; - for (map::iterator iter = _peers.begin(); - iter != _peers.end(); - iter++) { - const String& name = iter->first; - if (find(names.begin(), names.end(), name) == names.end()) - remove_us.push_back(name); - } - for (vector::iterator iter = remove_us.begin(); - iter != remove_us.end(); - iter++) { - log("dropping connection with " + *iter, LogCommunicator); - _peers.erase(*iter); - } - - // connect to peers - if (time_to_connect()) { - for (vector::iterator iter = names.begin(); - iter != names.end(); - iter ++) { - const String& name = *iter; - if (_peers.find(name) == _peers.end()) - try { - _peers.insert(pair(name, Peer(name, _port))); - log("connected to " + name + ", socket " + _peers[name].get_sock_fd(), LogCommunicator); - } catch ( ... ) {} - } - } - - // buffer msgs - for (vector::iterator iter_q = que.begin(); - iter_q != que.end(); - iter_q++) { - String& msg = *iter_q; - _delivery_point.msg_arrived(my_hostname, msg); - for (map::iterator iter_p = _peers.begin(); - iter_p != _peers.end(); - iter_p++) - iter_p->second.append(msg); - } - - serve_sockets(names); - - } // while(!shouldStop()) + while (!shouldStop()) { + vector names; + vector que; + String my_hostname; + + { + MutexLocker l(_mutex); + my_hostname = _my_hostname; + + for (unsigned int i = 0 ; i < _peer_hostnames.size() ; i++) + names.push_back(_peer_hostnames[i]); + + for (unsigned int i = 0 ; i < _out_q.size() ; i++) + que.push_back(_out_q[i]); + _out_q.clear(); + } + + // remove non-peers + vector remove_us; + for (map::iterator + iter = _peers.begin() ; + iter != _peers.end() ; + iter++) + { + const String& name = iter->first; + if (find(names.begin(), names.end(), name) == names.end()) + remove_us.push_back(name); + } + + for (vector::iterator + iter = remove_us.begin() ; + iter != remove_us.end() ; + iter++) + { + log("dropping connection with " + *iter, LogCommunicator); + _peers.erase(*iter); + } + + // connect to peers + if (time_to_connect()) { + for (vector::iterator + iter = names.begin() ; + iter != names.end() ; + iter++) + { + const String& name = *iter; + if (_peers.find(name) == _peers.end()) { + try { + _peers.insert(pair(name, Peer(name, _port))); + log("connected to " + name + ", socket " + + _peers[name].get_sock_fd(), LogCommunicator); + } catch ( ... ) {} + } + } + } + + // buffer msgs + for (vector::iterator iter_q = que.begin(); + iter_q != que.end(); + iter_q++) + { + String& msg = *iter_q; + _delivery_point.msg_arrived(my_hostname, msg); + for (map::iterator iter_p = _peers.begin(); + iter_p != _peers.end(); + iter_p++) + { + iter_p->second.append(msg); + } + } + + serve_sockets(names); + } } -void +void Communicator::serve_sockets(vector& names) { - map fd_peer; - for (map::iterator iter = _peers.begin(); - iter != _peers.end(); - iter++) { - Peer& peer = iter->second; - fd_peer.insert(pair(peer.get_sock_fd(), peer)); - } - unsigned int socks_num = fd_peer.size() + 1; - - // prepare poll structs - array_auto_ptr poll_data(new poll_fd[socks_num]); - poll_data[0].fd = _serv_sock.get_sock(); - poll_data[0].events = POLLIN; - poll_data[0].revents = 0; - map::iterator iter = fd_peer.begin(); - for (unsigned int i=1; ifirst; - poll_data[i].events = POLLIN; - if ( ! iter->second.outq_empty()) - poll_data[i].events = POLLOUT; - poll_data[i].revents = 0; - } - - // wait for events - int ret = poll(poll_data.get(), socks_num, 500); - if (ret == 0) - return; - else if (ret == -1) { - if (errno == EINTR) - return; - else - throw String("Communicator::run(): poll() error"); - } - - // process events - for (unsigned int i=0; i::iterator iter = names.begin(); - iter != names.end(); - iter++) { - String& name = *iter; - if (sock.connected_to(name)) - hostname = name; - } - if (hostname.size()) { - _peers.insert(pair(hostname, Peer(hostname, sock))); - log("accepted connection from " + hostname + ", socket " + sock.get_sock(), LogCommunicator); - } - } catch ( ... ) {} - } - if (poll_info.revents & (POLLERR | POLLHUP | POLLNVAL)) - throw String("Communicator::run(): server socket error????"); - } - - // client socket - else { - Peer& peer = fd_peer[poll_info.fd]; - if (poll_info.revents & POLLIN) { - vector msgs; - try { - msgs = peer.receive(); - } catch ( ... ) { - log("error receiving data from " + peer.hostname(), LogCommunicator); - _peers.erase(peer.hostname()); - continue; + map fd_peer; + for (map::iterator + iter = _peers.begin(); + iter != _peers.end(); + iter++) + { + Peer& peer = iter->second; + fd_peer.insert(pair(peer.get_sock_fd(), peer)); + } + + unsigned int socks_num = fd_peer.size() + 1; + + // prepare poll structs + array_auto_ptr poll_data(new poll_fd[socks_num]); + poll_data[0].fd = _serv_sock.get_sock(); + poll_data[0].events = POLLIN; + poll_data[0].revents = 0; + map::iterator iter = fd_peer.begin(); + + for (unsigned int i = 1 ; i < socks_num ; i++, iter++) { + poll_data[i].fd = iter->first; + poll_data[i].events = POLLIN; + if (!iter->second.outq_empty()) + poll_data[i].events = POLLOUT; + poll_data[i].revents = 0; } - for (unsigned int i=0; i::iterator + iter = names.begin() ; + iter != names.end() ; + iter++) + { + String& name = *iter; + if (sock.connected_to(name)) + hostname = name; + } + + if (hostname.size()) { + _peers.insert(pair(hostname, Peer(hostname, sock))); + log("accepted connection from " + hostname + ", socket " + + sock.get_sock(), LogCommunicator); + } + } catch ( ... ) {} + } + if (poll_info.revents & (POLLERR | POLLHUP | POLLNVAL)) + throw String("Communicator::run(): server socket error"); + } else { + // client socket + Peer& peer = fd_peer[poll_info.fd]; + if (poll_info.revents & POLLIN) { + vector msgs; + try { + msgs = peer.receive(); + } catch ( ... ) { + log("error receiving data from " + peer.hostname(), + LogCommunicator); + _peers.erase(peer.hostname()); + continue; + } + for (unsigned int i = 0 ; i < msgs.size() ; i++) + _delivery_point.msg_arrived(peer.hostname(), msgs[i]); + continue; + } + if (poll_info.revents & (POLLERR | POLLHUP | POLLNVAL)) { + _peers.erase(peer.hostname()); + continue; + } + if (poll_info.revents & POLLOUT) { + try { + peer.send(); + } catch ( ... ) { + log("error sending data to " + peer.hostname(), + LogCommunicator); + _peers.erase(peer.hostname()); + continue; + } + } + } } - } - } // client socket - } // process events } bool Communicator::time_to_connect() { - if (time_sec() > _connect_time) { - _connect_time = time_sec() + rand_r(&_rand_state) % 15; - return true; - } - return false; + if (time_sec() > _connect_time) { + _connect_time = time_sec() + rand_r(&_rand_state) % 15; + return true; + } + return false; } --- conga/ricci/modules/cluster/clumon/src/daemon/Communicator.h 2006/08/10 22:53:08 1.3 +++ conga/ricci/modules/cluster/clumon/src/daemon/Communicator.h 2007/09/04 18:28:40 1.4 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -13,7 +13,7 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -21,8 +21,8 @@ */ -#ifndef Communicator_h -#define Communicator_h +#ifndef __CONGA_CLUSTER_COMMUNICATOR_H +#define __CONGA_CLUSTER_COMMUNICATOR_H #include "Thread.h" #include "Mutex.h" @@ -34,59 +34,54 @@ #include -namespace ClusterMonitoring +namespace ClusterMonitoring { class CommDP { - public: - CommDP(); - virtual ~CommDP(); - - virtual void msg_arrived(const String& host, - const String& msg) = 0; + public: + CommDP(); + virtual ~CommDP(); + + virtual void msg_arrived(const String& host, const String& msg) = 0; }; class Communicator : public Thread { - public: - Communicator(unsigned short port, CommDP& delivery_point); - virtual ~Communicator(); - - void send(const String& msg); - void update_peers(const String& self, const std::vector& peers); - - private: - unsigned short _port; - ServerSocket _serv_sock; - - CommDP& _delivery_point; - - std::map _peers; - - Mutex _mutex; - String _my_hostname; - std::vector _out_q; - std::vector _peer_hostnames; - - void serve_sockets(std::vector&hostnames); - - bool time_to_connect(); - unsigned int _connect_time; - unsigned int _rand_state; - - - void run(); - - Communicator(const Communicator&); - Communicator& operator= (const Communicator&); + public: + Communicator(unsigned short port, CommDP& delivery_point); + virtual ~Communicator(); + + void send(const String& msg); + void update_peers(const String& self, const std::vector& peers); + + private: + unsigned short _port; + ServerSocket _serv_sock; + + CommDP& _delivery_point; + + std::map _peers; -}; // class Communicator + Mutex _mutex; + String _my_hostname; + std::vector _out_q; + std::vector _peer_hostnames; + void serve_sockets(std::vector&hostnames); + bool time_to_connect(); + unsigned int _connect_time; + unsigned int _rand_state; -}; // namespace ClusterMonitoring + void run(); + Communicator(const Communicator&); + Communicator& operator= (const Communicator&); +}; + + +}; -#endif // Communicator_h +#endif --- conga/ricci/modules/cluster/clumon/src/daemon/Monitor.cpp 2007/08/22 18:47:20 1.15 +++ conga/ricci/modules/cluster/clumon/src/daemon/Monitor.cpp 2007/09/04 18:28:40 1.16 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -13,7 +13,7 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -37,781 +37,892 @@ using namespace ClusterMonitoring; using namespace std; - - -#define RG_STATE_STOPPED 110 /** Resource group is stopped */ -#define RG_STATE_STARTING 111 /** Resource is starting */ -#define RG_STATE_STARTED 112 /** Resource is started */ -#define RG_STATE_STOPPING 113 /** Resource is stopping */ -#define RG_STATE_FAILED 114 /** Resource has failed */ -#define RG_STATE_UNINITIALIZED 115 /** Thread not running yet */ -#define RG_STATE_CHECK 116 /** Checking status */ -#define RG_STATE_ERROR 117 /** Recoverable error */ -#define RG_STATE_RECOVER 118 /** Pending recovery */ -#define RG_STATE_DISABLED 119 /** Resource not allowd to run */ -#define RG_STATE_MIGRATE 120 /** Resource migrating */ - - - - -#define EXECUTE_TIMEOUT 3000 - -#define CCS_TOOL_PATH "/sbin/ccs_tool" - - -static XMLObject -merge_xmls(const XMLObject& what, const XMLObject& with); -static String -cluster_version(); -static String -get_cman_tool_path(); - - +#define RG_STATE_STOPPED 110 /** Resource group is stopped */ +#define RG_STATE_STARTING 111 /** Resource is starting */ +#define RG_STATE_STARTED 112 /** Resource is started */ +#define RG_STATE_STOPPING 113 /** Resource is stopping */ +#define RG_STATE_FAILED 114 /** Resource has failed */ +#define RG_STATE_UNINITIALIZED 115 /** Thread not running yet */ +#define RG_STATE_CHECK 116 /** Checking status */ +#define RG_STATE_ERROR 117 /** Recoverable error */ +#define RG_STATE_RECOVER 118 /** Pending recovery */ +#define RG_STATE_DISABLED 119 /** Resource not allowd to run */ +#define RG_STATE_MIGRATE 120 /** Resource migrating */ + +#define EXECUTE_TIMEOUT 3000 +#define CCS_TOOL_PATH "/sbin/ccs_tool" + +static XMLObject merge_xmls(const XMLObject& what, const XMLObject& with); +static String cluster_version(); +static String get_cman_tool_path(); Monitor::Monitor(unsigned short port) : - _comm(port, *this), - _cl_version(cluster_version()), - _cman_tool_path(get_cman_tool_path()), - _cman_locking(_cl_version == "5") + _comm(port, *this), + _cl_version(cluster_version()), + _cman_tool_path(get_cman_tool_path()), + _cman_locking(_cl_version == "5") { - log("Monitor created", LogMonitor); + log("Monitor created", LogMonitor); } Monitor::~Monitor() { - log("Stopping monitoring", LogMonitor); - stop(); - log("Monitoring stopped", LogMonitor); - log("Monitor deleted", LogMonitor); + log("Stopping monitoring", LogMonitor); + stop(); + log("Monitoring stopped", LogMonitor); + log("Monitor deleted", LogMonitor); } - -void +void Monitor::update_now() { - try { - MutexLocker l(_mutex); - String my_nodename, clustername, msg; - vector nodenames = get_local_info(my_nodename, - clustername, - msg); - msg_arrived(my_nodename, msg); - _cluster = merge_data(clustername); - } catch ( ... ) {} + try { + MutexLocker l(_mutex); + String my_nodename, clustername, msg; + vector nodenames= get_local_info(my_nodename, clustername, msg); + msg_arrived(my_nodename, msg); + _cluster = merge_data(clustername); + } catch ( ... ) {} } void Monitor::run() { - log("Starting communicator", LogCommunicator); - _comm.start(); - while (!shouldStop()) { - - unsigned int time_beg = time_sec(); - - try { - // get local info - String my_nodename, clustername, msg; - vector nodenames = get_local_info(my_nodename, - clustername, - msg); - - // publish it - _comm.update_peers(my_nodename, nodenames); - _comm.send(msg + '\n'); - - // merge data from all nodes (removing stale entries) and update _cluster - { - MutexLocker l(_mutex); - _cluster = merge_data(clustername); - } - } catch ( ... ) { - MutexLocker l(_mutex); - _cluster = counting_auto_ptr(); - } - - String msg = String("monitoring iteration took ") + (time_sec() - time_beg) + " seconds"; - log(msg, LogTime); - - // wait some time - for (int i=0; i<10; i++) { - if (shouldStop()) - break; - struct pollfd nothing; - poll(¬hing, 0, 500); - } - } - log("Stopping communicator", LogCommunicator); - _comm.stop(); - log("Communicator stopped", LogCommunicator); -} + log("Starting communicator", LogCommunicator); + _comm.start(); + while (!shouldStop()) { + unsigned int time_beg = time_sec(); + + try { + // get local info + String my_nodename, clustername, msg; + vector nodenames = get_local_info(my_nodename, + clustername, msg); + + // publish it + _comm.update_peers(my_nodename, nodenames); + _comm.send(msg + '\n'); + + // merge data from all nodes (removing stale entries) + // and update _cluster + { + MutexLocker l(_mutex); + _cluster = merge_data(clustername); + } + } catch ( ... ) { + MutexLocker l(_mutex); + _cluster = counting_auto_ptr(); + } + + String msg = String("monitoring iteration took ") + + (time_sec() - time_beg) + " seconds"; + log(msg, LogTime); + + // wait some time + for (int i = 0 ; i < 10 ; i++) { + if (shouldStop()) + break; + struct pollfd nothing; + poll(¬hing, 0, 500); + } + } + log("Stopping communicator", LogCommunicator); + _comm.stop(); + log("Communicator stopped", LogCommunicator); +} String Monitor::request(const String& msg) { - MutexLocker l(_mutex); - if (msg == "GET") { - XMLObject def_xml("cluster"); - def_xml.set_attr("cluster_version", _cl_version); - String def(generateXML(def_xml) + "\n"); - if (_cluster.get() == NULL) - update_now(); - if (_cluster.get() == NULL) - return def; - try { - return cluster2xml(*_cluster) + "\n"; - } catch ( ... ) { - return def; - } - } - throw String("invalid request"); + MutexLocker l(_mutex); + + if (msg == "GET") { + XMLObject def_xml("cluster"); + def_xml.set_attr("cluster_version", _cl_version); + String def(generateXML(def_xml) + "\n"); + + if (_cluster.get() == NULL) + update_now(); + if (_cluster.get() == NULL) + return def; + + try { + return cluster2xml(*_cluster) + "\n"; + } catch ( ... ) { + return def; + } + } + throw String("invalid request"); } -void +void Monitor::msg_arrived(const String& hostname, const String& msg_in) { - String msg(msg_in); - // strip \n from the beggining - while (msg.size()) - if (msg[0] == '\n') - msg = msg.substr(1); - else - break; - - try { - XMLObject obj = parseXML(msg); - if (obj.tag() == "clumond") { - String type = obj.get_attr("type"); - if (type == "clusterupdate") - if (obj.children().size() == 1) { - const XMLObject& cluster = *(obj.children().begin()); - if (cluster.tag() == "cluster") { - MutexLocker l(_mutex); - pair data(time_sec(), cluster); - _cache[hostname] = data; - } - } - // TODO: other msgs - } - } catch ( ... ) {} -} - -vector -Monitor::get_local_info(String& nodename, - String& clustername, - String& msg) -{ - XMLObject cluster(parse_cluster_conf()); - - // nodes - vector nodes; - for (list::const_iterator iter = cluster.children().begin(); - iter != cluster.children().end(); - iter++) - if (iter->tag() == "node") - nodes.push_back(iter->get_attr("name")); - - // clustername - clustername = cluster.get_attr("name"); - - // nodename - nodename = this->nodename(nodes); - - try { - cluster.set_attr("minQuorum", probe_quorum()); - } catch ( ... ) {} - - cluster.set_attr("cluster_version", _cl_version); - - // insert current node info - const vector clustered_nodes = this->clustered_nodes(); - for (list::const_iterator iter = cluster.children().begin(); - iter != cluster.children().end(); - iter++) { - XMLObject& kid = (XMLObject&) *iter; - if (kid.tag() == "node") { - String name(kid.get_attr("name")); - if (name == nodename) { - // insert info about this node -> self - kid.set_attr("uptime", uptime()); - } - if (find(clustered_nodes.begin(), clustered_nodes.end(), name) != clustered_nodes.end()) { - kid.set_attr("online", "true"); - kid.set_attr("clustered", "true"); - } - } - } - - // insert current service info - const vector services_info = this->services_info(); - for (vector::const_iterator iter_i = services_info.begin(); - iter_i != services_info.end(); - iter_i++) { - const XMLObject& service = *iter_i; - for (list::const_iterator iter_c = cluster.children().begin(); - iter_c != cluster.children().end(); - iter_c++) { - XMLObject& kid = (XMLObject&) *iter_c; - if (kid.tag() == "service") - if (kid.get_attr("name") == service.get_attr("name")) { - for (map::const_iterator iter = service.attrs().begin(); - iter != service.attrs().end(); - iter++) - kid.set_attr(iter->first, iter->second); - } - } - } - - // ** return values ** - - // msg - XMLObject msg_xml("clumond"); - msg_xml.set_attr("type", "clusterupdate"); - msg_xml.add_child(cluster); - msg = generateXML(msg_xml); - - // return nodes - nodename - vector::iterator iter = find(nodes.begin(), nodes.end(), nodename); - if (iter != nodes.end()) - nodes.erase(iter); - return nodes; + String msg(msg_in); + // strip \n from the beggining + + while (msg.size()) { + if (msg[0] == '\n') + msg = msg.substr(1); + else + break; + } + + try { + XMLObject obj = parseXML(msg); + if (obj.tag() == "clumond") { + String type = obj.get_attr("type"); + if (type == "clusterupdate") { + if (obj.children().size() == 1) { + const XMLObject& cluster = *(obj.children().begin()); + if (cluster.tag() == "cluster") { + MutexLocker l(_mutex); + pair data(time_sec(), cluster); + _cache[hostname] = data; + } + } + // TODO: other msgs + } + } + } catch ( ... ) {} +} + +vector +Monitor::get_local_info(String& nodename, String& clustername, String& msg) +{ + XMLObject cluster(parse_cluster_conf()); + + // nodes + vector nodes; + + for (list::const_iterator + iter = cluster.children().begin() ; + iter != cluster.children().end() ; + iter++) + { + if (iter->tag() == "node") + nodes.push_back(iter->get_attr("name")); + } + + // clustername + clustername = cluster.get_attr("name"); + + // nodename + nodename = this->nodename(nodes); + + try { + cluster.set_attr("minQuorum", probe_quorum()); + } catch ( ... ) {} + + cluster.set_attr("cluster_version", _cl_version); + + // insert current node info + const vector clustered_nodes = this->clustered_nodes(); + for (list::const_iterator + iter = cluster.children().begin() ; + iter != cluster.children().end() ; + iter++) + { + XMLObject& kid = (XMLObject&) *iter; + if (kid.tag() == "node") { + String name(kid.get_attr("name")); + if (name == nodename) { + // insert info about this node -> self + kid.set_attr("uptime", uptime()); + } + + if (find(clustered_nodes.begin(), clustered_nodes.end(), name) != + clustered_nodes.end()) + { + kid.set_attr("online", "true"); + kid.set_attr("clustered", "true"); + } + } + } + + // insert current service info + const vector services_info = this->services_info(); + for (vector::const_iterator + iter_i = services_info.begin() ; + iter_i != services_info.end() ; + iter_i++) + { + const XMLObject& service = *iter_i; + for (list::const_iterator + iter_c = cluster.children().begin() ; + iter_c != cluster.children().end() ; + iter_c++) + { + XMLObject& kid = (XMLObject&) *iter_c; + if (kid.tag() == "service") { + if (kid.get_attr("name") == service.get_attr("name")) { + for (map::const_iterator + iter = service.attrs().begin() ; + iter != service.attrs().end() ; + iter++) + { + kid.set_attr(iter->first, iter->second); + } + } + } + } + } + + // ** return values ** + + // msg + XMLObject msg_xml("clumond"); + msg_xml.set_attr("type", "clusterupdate"); + msg_xml.add_child(cluster); + msg = generateXML(msg_xml); + + // return nodes - nodename + vector::iterator iter = find(nodes.begin(), nodes.end(), nodename); + if (iter != nodes.end()) + nodes.erase(iter); + return nodes; } -XMLObject +XMLObject Monitor::parse_cluster_conf() { - XMLObject cluster_conf(readXML("/etc/cluster/cluster.conf")); - if (cluster_conf.tag() != "cluster" || - utils::strip(cluster_conf.get_attr("name")).empty() || - utils::strip(cluster_conf.get_attr("config_version")).empty()) - throw String("parse_cluster_conf(): invalid cluster.conf"); - - XMLObject cluster("cluster"); - for (map::const_iterator iter = cluster_conf.attrs().begin(); - iter != cluster_conf.attrs().end(); - iter++) - cluster.set_attr(iter->first, iter->second); - if (utils::strip(cluster.get_attr("alias")).empty()) - cluster.set_attr("alias", cluster.get_attr("name")); - - for (list::const_iterator iter = cluster_conf.children().begin(); - iter != cluster_conf.children().end(); - iter++) { - const XMLObject& kid = *iter; - if (kid.tag() == "clusternodes") { - for (list::const_iterator iter_n = kid.children().begin(); - iter_n != kid.children().end(); - iter_n++) { - const XMLObject& node_conf = *iter_n; - if (node_conf.tag() == "clusternode") { - XMLObject node("node"); - for (map::const_iterator iter_a = node_conf.attrs().begin(); - iter_a != node_conf.attrs().end(); - iter_a++) - node.set_attr(iter_a->first, iter_a->second); - cluster.add_child(node); - } - } - } else if (kid.tag() == "rm") { - for (list::const_iterator iter_s = kid.children().begin(); - iter_s != kid.children().end(); - iter_s++) { - const XMLObject& service_conf = *iter_s; - if (service_conf.tag() == "service" || - service_conf.tag() == "vm") { - XMLObject service("service"); - for (map::const_iterator iter_a = service_conf.attrs().begin(); - iter_a != service_conf.attrs().end(); - iter_a++) - service.set_attr(iter_a->first, iter_a->second); - if (service_conf.tag() == "vm") - service.set_attr("vm", "true"); - else - service.set_attr("vm", "false"); - cluster.add_child(service); - } - } - } else if (kid.tag() == "cman") { - cluster.set_attr("locking", "cman"); - if (kid.has_attr("expected_votes")) - cluster.set_attr("minQuorum", kid.get_attr("expected_votes")); - } else if (kid.tag() == "gulm") { - cluster.set_attr("locking", "gulm"); - } - } - - if (_cl_version == "5") - cluster.set_attr("locking", "cman"); - _cman_locking = (cluster.get_attr("locking") == "cman"); - - return cluster; + XMLObject cluster_conf(readXML("/etc/cluster/cluster.conf")); + if (cluster_conf.tag() != "cluster" || + utils::strip(cluster_conf.get_attr("name")).empty() || + utils::strip(cluster_conf.get_attr("config_version")).empty()) + { + throw String("parse_cluster_conf(): invalid cluster.conf"); + } + + XMLObject cluster("cluster"); + for (map::const_iterator + iter = cluster_conf.attrs().begin() ; + iter != cluster_conf.attrs().end() ; + iter++) + { + cluster.set_attr(iter->first, iter->second); + } + + if (utils::strip(cluster.get_attr("alias")).empty()) + cluster.set_attr("alias", cluster.get_attr("name")); + + for (list::const_iterator + iter = cluster_conf.children().begin() ; + iter != cluster_conf.children().end() ; + iter++) + { + const XMLObject& kid = *iter; + if (kid.tag() == "clusternodes") { + for (list::const_iterator + iter_n = kid.children().begin() ; + iter_n != kid.children().end() ; + iter_n++) + { + const XMLObject& node_conf = *iter_n; + if (node_conf.tag() == "clusternode") { + XMLObject node("node"); + for (map::const_iterator + iter_a = node_conf.attrs().begin() ; + iter_a != node_conf.attrs().end() ; + iter_a++) + { + node.set_attr(iter_a->first, iter_a->second); + } + cluster.add_child(node); + } + } + } else if (kid.tag() == "rm") { + for (list::const_iterator + iter_s = kid.children().begin() ; + iter_s != kid.children().end() ; + iter_s++) + { + const XMLObject& service_conf = *iter_s; + + if (service_conf.tag() == "service" || + service_conf.tag() == "vm") + { + XMLObject service("service"); + for (map::const_iterator + iter_a = service_conf.attrs().begin() ; + iter_a != service_conf.attrs().end() ; + iter_a++) + { + service.set_attr(iter_a->first, iter_a->second); + } + + if (service_conf.tag() == "vm") + service.set_attr("vm", "true"); + else + service.set_attr("vm", "false"); + cluster.add_child(service); + } + } + } else if (kid.tag() == "cman") { + cluster.set_attr("locking", "cman"); + if (kid.has_attr("expected_votes")) + cluster.set_attr("minQuorum", kid.get_attr("expected_votes")); + } else if (kid.tag() == "gulm") { + cluster.set_attr("locking", "gulm"); + } + } + + if (_cl_version == "5") + cluster.set_attr("locking", "cman"); + _cman_locking = (cluster.get_attr("locking") == "cman"); + + return cluster; } -counting_auto_ptr +counting_auto_ptr Monitor::merge_data(const String& clustername) { - MutexLocker l(_mutex); - - XMLObject cluster("cluster"); - cluster.set_attr("name", clustername); - cluster.set_attr("config_version", "0"); - unsigned int config_version = 0; - - vector >::iterator> stales; - vector online_nodes; - - for (map >::iterator iter = _cache.begin(); - iter != _cache.end(); - iter++) { - if (iter->second.first < time_sec() - 8) - stales.push_back(iter); - else { - online_nodes.push_back(iter->first); - const XMLObject& cl2 = iter->second.second; - if (cl2.has_attr("name") && - cl2.get_attr("name") == cluster.get_attr("name")) { - unsigned int v; - if (sscanf(cl2.get_attr("config_version").c_str(), "%u", &v) != 1) - continue; - if (v == config_version) - cluster = merge_xmls(cluster, cl2); - else if (v > config_version) { - config_version = v; - cluster = cl2; - } - } - } - } - for (unsigned int i=0; i(); - - // build cluster - counting_auto_ptr cluster_ret; - String name = cluster.get_attr("name"); - String alias = cluster.get_attr("alias"); - String clu_version = cluster.get_attr("cluster_version"); - unsigned int minQuorum = 0; - if (sscanf(cluster.get_attr("minQuorum").c_str(), "%u", &minQuorum) != 1) - cluster_ret = counting_auto_ptr (new Cluster(name, alias, clu_version)); - else - cluster_ret = counting_auto_ptr (new Cluster(name, alias, clu_version, minQuorum)); - // nodes - for (list::const_iterator iter = cluster.children().begin(); - iter != cluster.children().end(); - iter++) { - const XMLObject& obj = *iter; - if (obj.tag() == "node") { - String node_name = obj.get_attr("name"); - if (node_name.empty()) - throw String("merge_data(): node missing 'name' attr"); - unsigned int votes; - if (sscanf(obj.get_attr("votes").c_str(), "%u", &votes) != 1) - votes = 1; - bool online; - if (obj.has_attr("online")) - online = obj.get_attr("online") == "true"; - else - online = find(online_nodes.begin(), online_nodes.end(), node_name) != online_nodes.end(); - bool clustered = obj.get_attr("clustered") == "true"; - String uptime = obj.get_attr("uptime"); - // add node to cluster - cluster_ret->addNode(node_name, votes, online, clustered, uptime); - } - } - // services - for (list::const_iterator iter = cluster.children().begin(); - iter != cluster.children().end(); - iter++) { - const XMLObject& obj = *iter; - if (obj.tag() == "service") { - // name - String service_name = obj.get_attr("name"); - if (service_name.empty()) - throw String("merge_data(): service missing 'name' attr"); - bool running = obj.get_attr("running") == "true"; - String nodename = obj.get_attr("nodename"); - if (running && nodename.empty()) - throw String("merge_data(): running service missing 'nodename' attr"); - bool failed = obj.get_attr("failed") == "true"; - bool autostart = obj.get_attr("autostart") != "0"; - String time_since_transition = obj.get_attr("time_since_transition"); - // add service to cluster - cluster_ret->addService(service_name, nodename, failed, autostart, time_since_transition); - } - } - - return cluster_ret; + MutexLocker l(_mutex); + + XMLObject cluster("cluster"); + cluster.set_attr("name", clustername); + cluster.set_attr("config_version", "0"); + unsigned int config_version = 0; + + vector >::iterator> stales; + vector online_nodes; + + for (map >::iterator + iter = _cache.begin() ; + iter != _cache.end() ; + iter++) + { + if (iter->second.first < time_sec() - 8) + stales.push_back(iter); + else { + online_nodes.push_back(iter->first); + const XMLObject& cl2 = iter->second.second; + if (cl2.has_attr("name") && + cl2.get_attr("name") == cluster.get_attr("name")) + { + unsigned int v; + int ret; + + ret = sscanf(cl2.get_attr("config_version").c_str(), "%u", &v); + if (ret != 1) + continue; + + if (v == config_version) + cluster = merge_xmls(cluster, cl2); + else if (v > config_version) { + config_version = v; + cluster = cl2; + } + } + } + } + + for (unsigned int i = 0 ; i < stales.size() ; i++) + _cache.erase(stales[i]); + + if (_cache.size() == 0) + return counting_auto_ptr(); + + // build cluster + counting_auto_ptr cluster_ret; + String name = cluster.get_attr("name"); + String alias = cluster.get_attr("alias"); + String clu_version = cluster.get_attr("cluster_version"); + unsigned int minQuorum = 0; + + if (sscanf(cluster.get_attr("minQuorum").c_str(), "%u", &minQuorum) != 1) { + cluster_ret = + counting_auto_ptr (new Cluster(name, alias, clu_version)); + } else { + cluster_ret = counting_auto_ptr (new Cluster(name, alias, clu_version, minQuorum)); + } + + // nodes + for (list::const_iterator + iter = cluster.children().begin() ; + iter != cluster.children().end() ; + iter++) + { + const XMLObject& obj = *iter; + if (obj.tag() == "node") { + String node_name = obj.get_attr("name"); + if (node_name.empty()) + throw String("merge_data(): node missing 'name' attr"); + unsigned int votes; + if (sscanf(obj.get_attr("votes").c_str(), "%u", &votes) != 1) + votes = 1; + bool online; + if (obj.has_attr("online")) + online = obj.get_attr("online") == "true"; + else + online = find(online_nodes.begin(), online_nodes.end(), node_name) != online_nodes.end(); + + bool clustered = obj.get_attr("clustered") == "true"; + String uptime = obj.get_attr("uptime"); + + // add node to cluster + cluster_ret->addNode(node_name, votes, online, clustered, uptime); + } + } + + // services + for (list::const_iterator + iter = cluster.children().begin() ; + iter != cluster.children().end() ; + iter++) + { + const XMLObject& obj = *iter; + if (obj.tag() == "service") { + // name + String service_name = obj.get_attr("name"); + if (service_name.empty()) + throw String("merge_data(): service missing 'name' attr"); + + bool running = obj.get_attr("running") == "true"; + String nodename = obj.get_attr("nodename"); + if (running && nodename.empty()) + throw String("merge_data(): running service missing 'nodename' attr"); + bool failed = obj.get_attr("failed") == "true"; + bool autostart = obj.get_attr("autostart") != "0"; + String time_since_trans = obj.get_attr("time_since_transition"); + + // add service to cluster + cluster_ret->addService(service_name, nodename, failed, + autostart, time_since_trans); + } + } + + return cluster_ret; } /* -bool +bool Monitor::clustered() { - String out, err; - int status; - vector args; - args.push_back("members"); - if (execute("/sbin/magma_tool", args, out, err, status, EXECUTE_TIMEOUT)) - throw String("clustered(): missing magma_tool"); - if (status) - return false; - - // look for 'Connect failure' substring - if (out.find("Connect failure") != out.npos) - return false; - else - return true; + String out, err; + int status; + vector args; + + args.push_back("members"); + if (execute("/sbin/magma_tool", args, out, err, status, EXECUTE_TIMEOUT)) + throw String("clustered(): missing magma_tool"); + if (status) + return false; + + // look for 'Connect failure' substring + if (out.find("Connect failure") != out.npos) + return false; + else + return true; } */ /* -bool +bool Monitor::quorate() { - String out, err; - int status; - vector args; - args.push_back("quorum"); - if (execute("/sbin/magma_tool", args, out, err, status, EXECUTE_TIMEOUT)) - throw String("quorate(): missing magma_tool"); - if (status) - return false; - - // look for 'Quorate' substring - if (out.find("Quorate") != out.npos) - return true; - else - return false; + String out, err; + int status; + vector args; + + args.push_back("quorum"); + if (execute("/sbin/magma_tool", args, out, err, status, EXECUTE_TIMEOUT)) + throw String("quorate(): missing magma_tool"); + if (status) + return false; + + // look for 'Quorate' substring + if (out.find("Quorate") != out.npos) + return true; + else + return false; } */ -vector +/* +** FIXME: rewrite this to use libcman. +*/ +vector Monitor::clustered_nodes() { - vector running; - - if (_cman_locking) { - String out, err; - int status; - vector args; - args.push_back("nodes"); - if (execute(_cman_tool_path, args, out, err, status, EXECUTE_TIMEOUT)) - throw String("clustered_nodes(): missing cman_tool"); - if (status) - return vector(); - vector::size_type Sts_idx = 0; - vector lines = utils::split(out, "\n"); - for (vector::iterator iter = lines.begin(); - iter != lines.end(); - iter++) { - vector words = utils::split(utils::strip(*iter)); - if (words.size() < Sts_idx+1) - continue; - if (words[0] == "Node") { - // update Sts_idx - for (vector::size_type i=0; i args; - args.push_back("members"); - if (execute("/sbin/magma_tool", args, out, err, status, EXECUTE_TIMEOUT)) - throw String("clustered_nodes(): missing magma_tool"); - if (status) - return vector(); - vector lines = utils::split(out, "\n"); - for (vector::iterator iter = lines.begin(); - iter != lines.end(); - iter++) { - String line = utils::strip(*iter); - if (line.find("Member ID") != line.npos) { - String t = line.substr(line.find(": ") + 2); - String::size_type idx = t.find(','); - String name = t.substr(0, idx); - String rest = t.substr(idx); - if (rest.find("UP") != rest.npos) - running.push_back(name); - } - } - } else - throw String("cluster version ") + _cl_version + " not supported"; - - return running; + vector running; + + if (_cman_locking) { + String out, err; + int status; + vector args; + args.push_back("nodes"); + + if (execute(_cman_tool_path, args, out, err, status, EXECUTE_TIMEOUT)) + throw String("clustered_nodes(): missing cman_tool"); + if (status) + return vector(); + + vector::size_type Sts_idx = 0; + vector lines = utils::split(out, "\n"); + for (vector::iterator + iter = lines.begin() ; + iter != lines.end() ; + iter++) + { + vector words = utils::split(utils::strip(*iter)); + if (words.size() < Sts_idx+1) + continue; + if (words[0] == "Node") { + // update Sts_idx + for (vector::size_type i = 0 ; i < words.size() ; i++) { + if (words[i] == "Sts") + Sts_idx = i; + } + } else if (words[Sts_idx] == "M") + running.push_back(words.back()); + } + } else if (_cl_version == "4") { + String out, err; + int status; + vector args; + + args.push_back("members"); + if (execute("/sbin/magma_tool", args, out, err, status,EXECUTE_TIMEOUT)) + throw String("clustered_nodes(): missing magma_tool"); + if (status) + return vector(); + + vector lines = utils::split(out, "\n"); + for (vector::iterator + iter = lines.begin() ; + iter != lines.end() ; + iter++) + { + String line = utils::strip(*iter); + if (line.find("Member ID") != line.npos) { + String t = line.substr(line.find(": ") + 2); + String::size_type idx = t.find(','); + String name = t.substr(0, idx); + String rest = t.substr(idx); + + if (rest.find("UP") != rest.npos) + running.push_back(name); + } + } + } else + throw String("cluster version ") + _cl_version + " not supported"; + + return running; } String Monitor::nodename(const vector& nodenames) { - if (_cman_locking) { - String out, err; - int status; - vector args(1, "status"); - if (execute(_cman_tool_path, args, out, err, status, EXECUTE_TIMEOUT)) - throw String("nodename(): missing ") + _cman_tool_path; - if (status) - // cman not running, match using address - out.clear(); - - vector lines = utils::split(utils::strip(out), "\n"); - for (vector::const_iterator iter = lines.begin(); - iter != lines.end(); - iter++) { - vector words = utils::split(utils::strip(*iter)); - if (words.size() != 3) - continue; - if (words[0] + " " + words[1] == "Node name:") - return words[2]; - } - } - - String out, err; - int status; - if (execute("/sbin/ifconfig", vector(), out, err, status, EXECUTE_TIMEOUT)) - throw String("nodename(): missing ifconfig"); - if (status) - throw String("nodename(): ifconfig failed???"); - - for (vector::const_iterator iter = nodenames.begin(); - iter != nodenames.end(); - iter++) { - const String& nodename = *iter; - vector ips = Network::name2IP(nodename); - for (vector::iterator iter_ip = ips.begin(); - iter_ip != ips.end(); - iter_ip++) { - if (out.find(*iter_ip) != out.npos) - return nodename; - } - } - - return ""; + if (_cman_locking) { + String out, err; + int status; + + vector args(1, "status"); + if (execute(_cman_tool_path, args, out, err, status, EXECUTE_TIMEOUT)) + throw String("nodename(): missing ") + _cman_tool_path; + + if (status) { + // cman not running, match using address + out.clear(); + } + + vector lines = utils::split(utils::strip(out), "\n"); + for (vector::const_iterator + iter = lines.begin() ; + iter != lines.end() ; + iter++) + { + vector words = utils::split(utils::strip(*iter)); + if (words.size() != 3) + continue; + + if (words[0] + " " + words[1] == "Node name:") + return words[2]; + } + } + + /* FIXME: REWRITE THIS. */ + String out, err; + int status; + if (execute("/sbin/ifconfig", vector(), out, err, + status, EXECUTE_TIMEOUT)) + { + throw String("nodename(): missing ifconfig"); + } + if (status) + throw String("nodename(): ifconfig failed"); + + for (vector::const_iterator + iter = nodenames.begin(); + iter != nodenames.end(); + iter++) + { + const String& nodename = *iter; + vector ips = Network::name2IP(nodename); + for (vector::iterator + iter_ip = ips.begin(); + iter_ip != ips.end(); + iter_ip++) + { + if (out.find(*iter_ip) != out.npos) + return nodename; + } + } + + return ""; } -vector +vector Monitor::services_info() { - vector services; - - try { - String out, err; - int status; - vector args; - - args.clear(); - args.push_back("-f"); - args.push_back("-x"); - if (execute("/usr/sbin/clustat", args, out, err, status, EXECUTE_TIMEOUT)) - throw String("services_info(): missing clustat"); - if (status) - throw String("services_info(): `clustat -x` failed"); - - XMLObject clustat = parseXML(out); - for (list::const_iterator iter_c = clustat.children().begin(); - iter_c != clustat.children().end(); - iter_c++) { - if (iter_c->tag() == "groups") { - const XMLObject& groups = *iter_c; - for (list::const_iterator iter = groups.children().begin(); - iter != groups.children().end(); - iter++) { - const XMLObject& group = *iter; - XMLObject service("service"); - service.set_attr("vm", "false"); - - // name - String name(group.get_attr("name")); - String::size_type idx = name.find(":"); - if (idx != name.npos) { - if (name.substr(0, idx) == "vm") - service.set_attr("vm", "true"); - name = name.substr(idx + 1); - } - service.set_attr("name", name); - - - // state - bool failed, running; - int state_code; - if (sscanf(group.get_attr("state").c_str(), "%i", &state_code) != 1) - continue; - switch (state_code) { - case RG_STATE_STOPPED: - case RG_STATE_STOPPING: - case RG_STATE_UNINITIALIZED: - case RG_STATE_ERROR: - case RG_STATE_RECOVER: - case RG_STATE_DISABLED: - running = failed = false; - break; - case RG_STATE_FAILED: - running = false; - failed = true; - break; - case RG_STATE_MIGRATE: - case RG_STATE_STARTING: - case RG_STATE_STARTED: - case RG_STATE_CHECK: - running = true; - failed = false; - break; - default: - continue; - } - service.set_attr("failed", (failed) ? "true" : "false"); - service.set_attr("running", (running) ? "true" : "false"); - if (running) - service.set_attr("nodename", group.get_attr("owner")); - - // last_transition - time_t since = (time_t) utils::to_long(group.get_attr("last_transition")); - if (since != 0) { - time_t current = (time_t) time_sec(); - service.set_attr("time_since_transition", utils::to_string(current - since)); - } - - services.push_back(service); - } - } - } - } catch ( ... ) {} - - return services; + vector services; + + try { + String out, err; + int status; + vector args; + + args.clear(); + args.push_back("-f"); + args.push_back("-x"); + if (execute("/usr/sbin/clustat", args, out, err, + status, EXECUTE_TIMEOUT)) + { + throw String("services_info(): missing clustat"); + } + + if (status) + throw String("services_info(): `clustat -x` failed: " + err); + + XMLObject clustat = parseXML(out); + for (list::const_iterator + iter_c = clustat.children().begin() ; + iter_c != clustat.children().end() ; + iter_c++) + { + if (iter_c->tag() == "groups") { + const XMLObject& groups = *iter_c; + for (list::const_iterator + iter = groups.children().begin() ; + iter != groups.children().end() ; + iter++) + { + const XMLObject& group = *iter; + XMLObject service("service"); + service.set_attr("vm", "false"); + + // name + String name(group.get_attr("name")); + String::size_type idx = name.find(":"); + if (idx != name.npos) { + if (name.substr(0, idx) == "vm") + service.set_attr("vm", "true"); + name = name.substr(idx + 1); + } + service.set_attr("name", name); + + // state + bool failed, running; + int state_code; + if (sscanf(group.get_attr("state").c_str(), "%i", + &state_code) != 1) + { + continue; + } + switch (state_code) { + case RG_STATE_STOPPED: + case RG_STATE_STOPPING: + case RG_STATE_UNINITIALIZED: + case RG_STATE_ERROR: + case RG_STATE_RECOVER: + case RG_STATE_DISABLED: + running = failed = false; + break; + + case RG_STATE_FAILED: + running = false; + failed = true; + break; + + case RG_STATE_MIGRATE: + case RG_STATE_STARTING: + case RG_STATE_STARTED: + case RG_STATE_CHECK: + running = true; + failed = false; + break; + + default: + continue; + } + service.set_attr("failed", (failed) ? "true" : "false"); + service.set_attr("running", (running) ? "true" : "false"); + if (running) + service.set_attr("nodename", group.get_attr("owner")); + + // last_transition + time_t since = (time_t) + utils::to_long(group.get_attr("last_transition")); + if (since != 0) { + time_t current = (time_t) time_sec(); + service.set_attr("time_since_transition", + utils::to_string(current - since)); + } + + services.push_back(service); + } + } + } + } catch ( ... ) {} + + return services; } String Monitor::uptime() const { - struct sysinfo s_info; - if (sysinfo(&s_info)) - return ""; - return String(utils::to_string(s_info.uptime)); + struct sysinfo s_info; + if (sysinfo(&s_info)) + return ""; + return String(utils::to_string(s_info.uptime)); } -String +String Monitor::probe_quorum() const { - if (_cman_locking) { - int status; - String out, err; - vector args; - args.push_back("status"); - if (execute(_cman_tool_path, args, out, err, status, EXECUTE_TIMEOUT)) - throw _cman_tool_path + " status failed"; - if (status) - throw _cman_tool_path + " status failed"; - - vector lines = utils::split(out, "\n"); - for (vector::const_iterator iter = lines.begin(); - iter != lines.end(); - iter++) { - vector words = utils::split(*iter); - if (words.size() < 2) - continue; - if (words[0] == "Quorum:") - return words[1]; - } - } else { - // TODO: implement quorum detection on GULM clusters - throw String("GULM quorum detection not yet implemented"); - } - throw String("quorum not found"); + if (_cman_locking) { + int status; + String out, err; + vector args; + + args.push_back("status"); + if (execute(_cman_tool_path, args, out, err, status, EXECUTE_TIMEOUT)) + throw _cman_tool_path + " status failed"; + if (status) + throw _cman_tool_path + " status failed"; + + vector lines = utils::split(out, "\n"); + for (vector::const_iterator + iter = lines.begin() ; + iter != lines.end() ; + iter++) + { + vector words = utils::split(*iter); + if (words.size() < 2) + continue; + if (words[0] == "Quorum:") + return words[1]; + } + } else { + // TODO: implement quorum detection on GULM clusters + throw String("GULM quorum detection not yet implemented"); + } + throw String("quorum not found"); } - String cluster_version() { - int status; - String out, err; - vector args; - args.push_back("-V"); - if (execute(CCS_TOOL_PATH, args, out, err, status, EXECUTE_TIMEOUT)) - throw String("missing ") + CCS_TOOL_PATH; - if (status) - throw String(CCS_TOOL_PATH) + " -V failed"; - - vector words = utils::split(utils::strip(out)); - if (words.size() < 2) - throw String(CCS_TOOL_PATH) + " -V failed"; - if (utils::strip(words[0]) != CCS_TOOL_PATH) - throw String(CCS_TOOL_PATH) + " -V failed"; - String version = utils::strip(words[1]); - if (version.size() < 5) - throw String(CCS_TOOL_PATH) + ": unrecognizable version format"; - if (version[0] == '1') - return "4"; - else if (version[0] == '2') - return "5"; - else - throw String(CCS_TOOL_PATH) + ": unsupported version"; -} + int status; + String out, err; + vector args; + + args.push_back("-V"); + if (execute(CCS_TOOL_PATH, args, out, err, status, EXECUTE_TIMEOUT)) + throw String("missing ") + CCS_TOOL_PATH; + + if (status) + throw String(CCS_TOOL_PATH) + " -V failed: " + out + " / " + err; + + vector words = utils::split(utils::strip(out)); + if (words.size() < 2) + throw String(CCS_TOOL_PATH) + " -V failed: " + out + " / " + err; + + if (utils::strip(words[0]) != CCS_TOOL_PATH) + throw String(CCS_TOOL_PATH) + " -V failed: " + out + " / " + err; + + String version = utils::strip(words[1]); + if (version.size() < 5) { + throw String(CCS_TOOL_PATH) + + ": unrecognizable version format: " + version; + } + if (version[0] == '1') + return "4"; + else if (version[0] == '2') + return "5"; + else + throw String(CCS_TOOL_PATH) + ": unsupported version: " + version[0]; +} XMLObject merge_xmls(const XMLObject& what, const XMLObject& with) { - if (what.tag() != with.tag()) - throw String("merge_xmls(): tag mismatch"); - - XMLObject new_xml(what.tag()); - for (map::const_iterator iter = what.attrs().begin(); - iter != what.attrs().end(); - iter++) - new_xml.set_attr(iter->first, iter->second); - for (map::const_iterator iter = with.attrs().begin(); - iter != with.attrs().end(); - iter++) - new_xml.set_attr(iter->first, iter->second); - - list kids_left = with.children(); - - for (list::const_iterator iter_o = what.children().begin(); - iter_o != what.children().end(); - iter_o++) { - XMLObject new_kid(*iter_o); - for (list::const_iterator iter = with.children().begin(); - iter != with.children().end(); - iter++) { - const XMLObject& kid = *iter; - if (kid.tag() == new_kid.tag() && - kid.has_attr("name") && - new_kid.has_attr("name") && - kid.get_attr("name") == new_kid.get_attr("name")) { - // same tag and name -->> merge - new_kid = merge_xmls(new_kid, kid); - kids_left.remove(kid); - } - } - new_xml.add_child(new_kid); - } - - for (list::const_iterator iter = kids_left.begin(); - iter != kids_left.end(); - iter++) - new_xml.add_child(*iter); - - return new_xml; -} + if (what.tag() != with.tag()) + throw String("merge_xmls(): tag mismatch"); + + XMLObject new_xml(what.tag()); + for (map::const_iterator + iter = what.attrs().begin() ; + iter != what.attrs().end() ; + iter++) + { + new_xml.set_attr(iter->first, iter->second); + for (map::const_iterator + iter = with.attrs().begin() ; + iter != with.attrs().end() ; + iter++) + { + new_xml.set_attr(iter->first, iter->second); + } + } + list kids_left = with.children(); + for (list::const_iterator + iter_o = what.children().begin() ; + iter_o != what.children().end() ; + iter_o++) + { + XMLObject new_kid(*iter_o); + for (list::const_iterator + iter = with.children().begin() ; + iter != with.children().end() ; + iter++) + { + const XMLObject& kid = *iter; + if (kid.tag() == new_kid.tag() && + kid.has_attr("name") && + new_kid.has_attr("name") && + kid.get_attr("name") == new_kid.get_attr("name")) + { + // same tag and name -->> merge + new_kid = merge_xmls(new_kid, kid); + kids_left.remove(kid); + } + } + new_xml.add_child(new_kid); + } + + for (list::const_iterator + iter = kids_left.begin() ; + iter != kids_left.end() ; + iter++) + { + new_xml.add_child(*iter); + } + + return new_xml; +} String get_cman_tool_path() { - String path = "/sbin/cman_tool"; - if (access(path.c_str(), X_OK)) - path = "/usr/sbin/cman_tool"; - return path; + String path = "/sbin/cman_tool"; + if (access(path.c_str(), X_OK)) + path = "/usr/sbin/cman_tool"; + return path; } --- conga/ricci/modules/cluster/clumon/src/daemon/Monitor.h 2007/03/23 17:25:13 1.6 +++ conga/ricci/modules/cluster/clumon/src/daemon/Monitor.h 2007/09/04 18:28:40 1.7 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -13,7 +13,7 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -21,8 +21,8 @@ */ -#ifndef Monitor_h -#define Monitor_h +#ifndef __CONGA_CLUSTER_MONITOR_H +#define __CONGA_CLUSTER_MONITOR_H #include "String.h" #include @@ -39,60 +39,51 @@ class Monitor : public Thread, public CommDP { - public: - Monitor(unsigned short port); - virtual ~Monitor(); - - String request(const String&); - - virtual void msg_arrived(const String& host, - const String& msg); - - protected: - virtual void run(); - private: - - Mutex _mutex; // _cluster and _cache - counting_auto_ptr _cluster; - std::map > _cache; - - Communicator _comm; - - /* - cluster versions: - RHEL3 = "3" - RHEL4, FC4, FC5 = "4" - RHEL5, FC6 = "5" - */ - const String _cl_version; - - const String _cman_tool_path; - - bool _cman_locking; - - void update_now(); - - // return (nodenames - my_nodename) - std::vector get_local_info(String& nodename, - String& clustername, - String& msg); - counting_auto_ptr merge_data(const String& clustername); - - XMLObject parse_cluster_conf(); - // bool clustered(); - // bool quorate(); - String nodename(const std::vector& nodenames); - std::vector clustered_nodes(); - std::vector services_info(); - - String uptime() const; - - String probe_quorum() const; - -}; // class Monitor + public: + Monitor(unsigned short port); + virtual ~Monitor(); + + String request(const String&); + + virtual void msg_arrived(const String& host, const String& msg); + + protected: + virtual void run(); + + private: + Mutex _mutex; // _cluster and _cache + counting_auto_ptr _cluster; + std::map > _cache; + Communicator _comm; + + /* + ** cluster versions: + ** RHEL3 = "3" + ** RHEL4, FC4, FC5 = "4" + ** RHEL5, FC6 = "5" + */ + const String _cl_version; + const String _cman_tool_path; + bool _cman_locking; + void update_now(); + + // return (nodenames - my_nodename) + std::vector get_local_info( String& nodename, + String& clustername, + String& msg); + counting_auto_ptr merge_data(const String& clustername); + XMLObject parse_cluster_conf(); + //bool clustered(); + //bool quorate(); + String nodename(const std::vector& nodenames); + std::vector clustered_nodes(); + std::vector services_info(); + + String uptime() const; + String probe_quorum() const; +}; -}; // namespace ClusterMonitoring +}; - -#endif // Monitor_h +#endif --- conga/ricci/modules/cluster/clumon/src/daemon/Peer.cpp 2006/10/14 18:00:02 1.3 +++ conga/ricci/modules/cluster/clumon/src/daemon/Peer.cpp 2007/09/04 18:28:40 1.4 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -13,7 +13,7 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -30,85 +30,86 @@ Peer::Peer() : - _sock(new ClientSocket()), - _in(new String()), - _out(new String()) + _sock(new ClientSocket()), + _in(new String()), + _out(new String()) {} Peer::Peer(const String& hostname, const ClientSocket& sock) : - _sock(new ClientSocket(sock)), - _hostname(hostname), - _in(new String()), - _out(new String()) + _sock(new ClientSocket(sock)), + _hostname(hostname), + _in(new String()), + _out(new String()) { - _sock->nonblocking(true); + _sock->nonblocking(true); } Peer::Peer(const String& hostname, unsigned short port) : - _sock(new ClientSocket(hostname, port)), - _hostname(hostname), - _in(new String()), - _out(new String()) + _sock(new ClientSocket(hostname, port)), + _hostname(hostname), + _in(new String()), + _out(new String()) { - _sock->nonblocking(true); + _sock->nonblocking(true); } Peer::~Peer() {} - -bool +bool Peer::operator== (const Peer& p) const { - return (_in == p._in && - _out == p._out && - _sock == p._sock && - _hostname == p._hostname); + return (_in == p._in && + _out == p._out && + _sock == p._sock && + _hostname == p._hostname); } -void +void Peer::send() { - if (_out->empty()) - return; - log("sending data to " + _hostname, LogTransfer); - String rest = _sock->send(*_out); - *_out = rest; + if (_out->empty()) + return; + + log("sending data to " + _hostname, LogTransfer); + String rest = _sock->send(*_out); + *_out = rest; } vector Peer::receive() { - log("receiving data from " + _hostname, LogTransfer); - - String& in = *_in; - in += _sock->recv(); - - vector ret; - while (true) { - String::size_type idx = in.find("\n\n"); - if (idx == in.npos) - return ret; - idx += 2; - ret.push_back(in.substr(0, idx)); - in = in.substr(idx); - } + log("receiving data from " + _hostname, LogTransfer); + + String& in = *_in; + in += _sock->recv(); + + vector ret; + + while (true) { + String::size_type idx = in.find("\n\n"); + if (idx == in.npos) + return ret; + idx += 2; + ret.push_back(in.substr(0, idx)); + in = in.substr(idx); + } } void Peer::append(const String& msg) { - _out->append(msg); + _out->append(msg); } -int +int Peer::get_sock_fd() { - return _sock->get_sock(); + return _sock->get_sock(); } String Peer::hostname() { - return _hostname; + return _hostname; } --- conga/ricci/modules/cluster/clumon/src/daemon/Peer.h 2006/08/10 22:53:08 1.2 +++ conga/ricci/modules/cluster/clumon/src/daemon/Peer.h 2007/09/04 18:28:40 1.3 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -13,16 +13,15 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * Author: Stanko Kupcevic */ - -#ifndef Peer_h -#define Peer_h +#ifndef __CONGA_CLUSTER_PEER_H +#define __CONGA_CLUSTER_PEER_H #include "counting_auto_ptr.h" #include "Socket.h" @@ -37,36 +36,34 @@ class Peer { - public: - Peer(); - Peer(const String& hostname, const ClientSocket&); - Peer(const String& hostname, unsigned short port); - virtual ~Peer(); - - void send(); - std::vector receive(); - - bool outq_empty() { return _out->empty(); } - - void append(const String& msg); - - int get_sock_fd(); - - String hostname(); - - bool operator== (const Peer&) const; - - private: - counting_auto_ptr _sock; - const String _hostname; - - counting_auto_ptr _in; - counting_auto_ptr _out; - -}; // class Peer + public: + Peer(); + Peer(const String& hostname, const ClientSocket&); + Peer(const String& hostname, unsigned short port); + virtual ~Peer(); + + void send(); + std::vector receive(); + + bool outq_empty() { return _out->empty(); } + + void append(const String& msg); + + int get_sock_fd(); + + String hostname(); + + bool operator== (const Peer&) const; + + private: + counting_auto_ptr _sock; + const String _hostname; + counting_auto_ptr _in; + counting_auto_ptr _out; +}; -}; // namespace ClusterMonitoring +}; -#endif // Peer +#endif --- conga/ricci/modules/cluster/clumon/src/daemon/main.cpp 2007/08/22 18:47:20 1.5 +++ conga/ricci/modules/cluster/clumon/src/daemon/main.cpp 2007/09/04 18:28:40 1.6 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -13,7 +13,7 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -57,184 +57,199 @@ class ClientInfo { -public: - ClientInfo() {} - ClientInfo(ClientSocket& sock, String str="") : - sock(sock), str(str) {} - - ClientSocket sock; - String str; + public: + ClientInfo() {} + ClientInfo(ClientSocket& sock, String str="") : + sock(sock), str(str) {} + + ClientSocket sock; + String str; }; +int +main(int argc, char **argv) +{ + bool debug = false, foreground = false; + int v_level = -1; + int rv; + + while ((rv = getopt(argc, argv, "fdv:")) != EOF) { + switch (rv) { + case 'd': + debug = true; + break; + + case 'f': + foreground = true; + break; + + case 'v': + if (sscanf(optarg, "%d", &v_level) != 1) { + fprintf(stderr, "Invalid verbosity level: %s\n", optarg); + v_level = -1; + } + break; + + default: + break; + } + } + + if (v_level < 0) { + cout << "Setting verbosity level to LogBasic" << endl; + v_level = LogBasic; + } + if (foreground) + set_logger(counting_auto_ptr(new Logger(1, "clumond", LogLevel(v_level)))); + else + set_logger(counting_auto_ptr(new Logger(LOG_FILE, "clumond", LogLevel(v_level)))); + + log("started"); + try { + ServerSocket server(MONITORING_CLIENT_SOCKET); + server.nonblocking(true); + Monitor monitor(COMMUNICATION_PORT); + + if (!foreground && (geteuid() == 0)) + daemon_init(argv[0]); + + setup_signal(SIGINT, shutdown); + setup_signal(SIGTERM, shutdown); + setup_signal(SIGCHLD, sigchild); + setup_signal(SIGPIPE, SIG_IGN); + + if (debug) + setup_signal(SIGSEGV, segfault); + else + unblock_signal(SIGSEGV); + + serve_clients(monitor, server); + } catch (String e) { + log("unhandled exception in main(): " + e); + log("died", LogAll); + return 1; + } catch ( ... ) { + log("unhandled unknown exception in main()"); + log("died", LogAll); + return 1; + } -int -main(int argc, char** argv) -{ - bool debug=false, foreground=false; - int v_level = -1; - - int rv; - while ((rv = getopt(argc, argv, "fdv:")) != EOF) - switch (rv) { - case 'd': - debug = true; - break; - case 'f': - foreground = true; - break; - case 'v': - sscanf(optarg, "%d", &v_level); - break; - default: - break; - } - - if (v_level < 0) { - cout << "Setting verbosity level to LogBasic" << endl; - v_level = LogBasic; - } - if (foreground) - set_logger(counting_auto_ptr(new Logger(1, "clumond", LogLevel(v_level)))); - else - set_logger(counting_auto_ptr(new Logger(LOG_FILE, "clumond", LogLevel(v_level)))); - - log("started"); - try { - ServerSocket server(MONITORING_CLIENT_SOCKET); - server.nonblocking(true); - Monitor monitor(COMMUNICATION_PORT); - - if (!foreground && (geteuid() == 0)) - daemon_init(argv[0]); - setup_signal(SIGINT, shutdown); - setup_signal(SIGTERM, shutdown); - setup_signal(SIGCHLD, sigchild); - setup_signal(SIGPIPE, SIG_IGN); - if (debug) - setup_signal(SIGSEGV, segfault); - else - unblock_signal(SIGSEGV); - - serve_clients(monitor, server); - } catch (String e) { - log("unhandled exception in main(): " + e); - log("died", LogAll); - return 1; - } catch ( ... ) { - log("unhandled unknown exception in main()"); - log("died", LogAll); - return 1; - } - - unlink("/var/run/clumond.pid"); - log("exited", LogAll); - return 0; + unlink("/var/run/clumond.pid"); + log("exited", LogAll); + return 0; } -void +void serve_clients(Monitor& monitor, ServerSocket& server) { - map clients; - - log("Starting monitor", LogMonitor); - monitor.start(); - - while (!shutdown_pending) { - unsigned int socks_num = clients.size() + 1; - - // prepare poll structs - array_auto_ptr poll_data(new poll_fd[socks_num]); - poll_data[0].fd = server.get_sock(); - poll_data[0].events = POLLIN; - poll_data[0].revents = 0; - map::iterator iter = clients.begin(); - for (unsigned int i=1; ifirst; - poll_data[i].events = POLLIN; - if ( ! iter->second.str.empty()) - poll_data[i].events |= POLLOUT; - poll_data[i].revents = 0; - iter++; - } - - // wait for events - int ret = poll(poll_data.get(), socks_num, 500); - if (ret == 0) - continue; - else if (ret == -1) { - if (errno == EINTR) - continue; - else - throw String("serve_clients(): poll() error: " + String(strerror(errno))); - } - - // process events - for (unsigned int i=0; i clients; + + log("Starting monitor", LogMonitor); + monitor.start(); + + while (!shutdown_pending) { + unsigned int socks_num = clients.size() + 1; + + // prepare poll structs + array_auto_ptr poll_data(new poll_fd[socks_num]); + poll_data[0].fd = server.get_sock(); + poll_data[0].events = POLLIN; + poll_data[0].revents = 0; + + map::iterator iter = clients.begin(); + for (unsigned int i = 1 ; i < socks_num ; i++) { + poll_data[i].fd = iter->first; + poll_data[i].events = POLLIN; + if (!iter->second.str.empty()) + poll_data[i].events |= POLLOUT; + poll_data[i].revents = 0; + iter++; + } + + // wait for events + int ret = poll(poll_data.get(), socks_num, 500); + if (ret == 0) + continue; + else if (ret == -1) { + if (errno == EINTR) + continue; + else { + throw String("serve_clients(): poll() error: " + + String(strerror(errno))); + } + } + + // process events + for (unsigned int i = 0 ; i < socks_num && !shutdown_pending ; i++) { + poll_fd& poll_info = poll_data[i]; + + // server socket + if (poll_info.fd == server.get_sock()) { + if (poll_info.revents & POLLIN) { + try { + ClientSocket sock = server.accept(); + sock.nonblocking(true); + clients[sock.get_sock()] = ClientInfo(sock); + } catch ( ... ) {} + } + if (poll_info.revents & (POLLERR | POLLHUP | POLLNVAL)) { + throw String("serve_clients(): poll: " + + String(strerror(errno))); + } + } else { + // client socket + if (poll_info.revents & POLLIN) { + ClientInfo& info = clients[poll_info.fd]; + try { + String msg = info.sock.recv(); + if (msg.size()) + info.str = monitor.request(msg); + } catch ( ... ) { + clients.erase(poll_info.fd); + } + continue; + } + + if (poll_info.revents & (POLLERR | POLLHUP | POLLNVAL)) { + clients.erase(poll_info.fd); + continue; + } + + if (poll_info.revents & POLLOUT) { + ClientInfo& info = clients[poll_info.fd]; + try { + info.str = info.sock.send(info.str); + } catch ( ... ) { + clients.erase(poll_info.fd); + continue; + } + } + } + } } - } // client socket - } // process events - } // while } -void +void shutdown(int) { - log_sigsafe("exit requested", LogExit); - shutdown_pending = true; + log_sigsafe("exit requested", LogExit); + shutdown_pending = true; } void segfault(int) { - char msg[128]; - snprintf(msg, sizeof(msg), "PID %d Thread %d: SIGSEGV, waiting forensics", - getpid(), (int) pthread_self()); + char msg[128]; - log_sigsafe(msg, LogAll); - select(0, NULL, NULL, NULL, NULL); + snprintf(msg, sizeof(msg), "PID %d Thread %d: SIGSEGV, waiting forensics", + getpid(), (int) pthread_self()); + log_sigsafe(msg, LogAll); + select(0, NULL, NULL, NULL, NULL); } -void +void sigchild(int) { - // do nothing + // do nothing } --- conga/ricci/modules/cluster/clumon/src/include/Cluster.h 2006/08/15 00:12:33 1.6 +++ conga/ricci/modules/cluster/clumon/src/include/Cluster.h 2007/09/04 18:28:40 1.7 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -13,7 +13,7 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -21,8 +21,8 @@ */ -#ifndef Cluster_h -#define Cluster_h +#ifndef __CONGA_MODCLUSTER_CLUSTER_H +#define __CONGA_MODCLUSTER_CLUSTER_H #include #include "String.h" @@ -32,10 +32,9 @@ #include "XML.h" -namespace ClusterMonitoring +namespace ClusterMonitoring { - class Node; class Service; class Cluster; @@ -47,119 +46,112 @@ class Cluster { - public: - Cluster(const String& name, - const String& alias, - const String& cluster_version, - unsigned int minQuorum=0); - virtual ~Cluster(); - - String name(); - String alias(); - String version(); - unsigned int votes(); - unsigned int minQuorum(); - bool quorate(); - - counting_auto_ptr addNode(const String& name, - unsigned int votes, - bool online, - bool clustered, - const String& uptime); - - counting_auto_ptr addService(const String& name, - const String& nodeName, - bool failed, - bool autostart, - const String& time_since_transition); - - std::list > nodes(); - std::list > clusteredNodes(); - std::list > unclusteredNodes(); - - std::list > services(); - std::list > runningServices(); - std::list > stoppedServices(); - std::list > failedServices(); - - private: - String _name; - String _alias; - String _cl_version; - unsigned int _minQuorum; - std::map > _nodes; - + public: + Cluster(const String& name, + const String& alias, + const String& cluster_version, + unsigned int minQuorum=0); + virtual ~Cluster(); + + String name(); + String alias(); + String version(); + unsigned int votes(); + unsigned int minQuorum(); + bool quorate(); + + counting_auto_ptr addNode(const String& name, + unsigned int votes, + bool online, + bool clustered, + const String& uptime); + + counting_auto_ptr addService( const String& name, + const String& nodeName, + bool failed, + bool autostart, + const String& time_since_transition); + + std::list > nodes(); + std::list > clusteredNodes(); + std::list > unclusteredNodes(); + + std::list > services(); + std::list > runningServices(); + std::list > stoppedServices(); + std::list > failedServices(); + + private: + String _name; + String _alias; + String _cl_version; + unsigned int _minQuorum; + std::map > _nodes; }; - class Node { - public: - Node(const String& name, - const String& clustername, - unsigned int votes, - bool online, - bool clustered, - const String& uptime); - virtual ~Node(); - - String name() const; - String clustername() const; - unsigned int votes() const; - bool online() const; - bool clustered() const; // available to cluster - String uptime() const; - - counting_auto_ptr addService(const String& name, - bool failed, - bool autostart, - const String& time_since_transition); - std::list > services(); - - private: - String _name; - String _clustername; - unsigned int _votes; - bool _online; - bool _clustered; // available to cluster - String _uptime; - - std::map > _services; - -}; + public: + Node( const String& name, + const String& clustername, + unsigned int votes, + bool online, + bool clustered, + const String& uptime); + virtual ~Node(); + + String name() const; + String clustername() const; + unsigned int votes() const; + bool online() const; + bool clustered() const; // available to cluster + String uptime() const; + + counting_auto_ptr addService(const String& name, + bool failed, + bool autostart, + const String& time_since_transition); + std::list > services(); + + private: + String _name; + String _clustername; + unsigned int _votes; + bool _online; + bool _clustered; // available to cluster + String _uptime; + std::map > _services; +}; class Service { - public: - Service(const String& name, - const String& clustername, - const Node& node, - bool failed, - bool autostart, - const String& time_since_transition); - virtual ~Service(); - - String name() const; - String clustername() const; - bool running() const; - String nodename() const; - bool failed() const; - bool autostart() const; - String time_since_transition() const; - - private: - String _name; - String _clustername; - String _nodename; - bool _autostart; - bool _failed; - String _time_since_transition; - + public: + Service(const String& name, + const String& clustername, + const Node& node, + bool failed, + bool autostart, + const String& time_since_transition); + virtual ~Service(); + + String name() const; + String clustername() const; + bool running() const; + String nodename() const; + bool failed() const; + bool autostart() const; + String time_since_transition() const; + + private: + String _name; + String _clustername; + String _nodename; + bool _autostart; + bool _failed; + String _time_since_transition; }; - -}; // namespace ClusterMonitoring - +}; #endif --- conga/ricci/modules/cluster/clumon/src/include/ClusterMonitor.h 2006/08/10 22:53:08 1.2 +++ conga/ricci/modules/cluster/clumon/src/include/ClusterMonitor.h 2007/09/04 18:28:40 1.3 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -21,8 +21,8 @@ */ -#ifndef ClusterMonitor_h -#define ClusterMonitor_h +#ifndef __CONGA_CLUSTERMONITOR_H +#define __CONGA_CLUSTERMONITOR_H #include "Cluster.h" #include "counting_auto_ptr.h" @@ -37,22 +37,19 @@ class ClusterMonitor { - public: - ClusterMonitor(const String& socket_path=MONITORING_CLIENT_SOCKET); - virtual ~ClusterMonitor(); - - counting_auto_ptr get_cluster(); - - private: - String _sock_path; - - ClusterMonitor(const ClusterMonitor&); - ClusterMonitor& operator= (const ClusterMonitor&); - -}; // class ClusterMonitor + public: + ClusterMonitor(const String& socket_path=MONITORING_CLIENT_SOCKET); + virtual ~ClusterMonitor(); + counting_auto_ptr get_cluster(); -}; // namespace ClusterMonitoring + private: + String _sock_path; + ClusterMonitor(const ClusterMonitor&); + ClusterMonitor& operator= (const ClusterMonitor&); +}; + +}; #endif --- conga/ricci/modules/cluster/clumon/src/include/clumond_globals.h 2006/03/27 23:15:30 1.1 +++ conga/ricci/modules/cluster/clumon/src/include/clumond_globals.h 2007/09/04 18:28:40 1.2 @@ -1,5 +1,5 @@ /* - Copyright Red Hat, Inc. 2005 + Copyright Red Hat, Inc. 2005-2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -13,7 +13,7 @@ You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -25,10 +25,8 @@ #define clumond_globals_h -#define COMMUNICATION_PORT 16851 -#define MONITORING_CLIENT_SOCKET "/var/run/clumond.sock" - -#define LOG_FILE "/var/log/clumond.log" - +#define COMMUNICATION_PORT 16851 +#define MONITORING_CLIENT_SOCKET "/var/run/clumond.sock" +#define LOG_FILE "/var/log/clumond.log" #endif