From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 30 Nov 2007 21:36:33 -0000 Subject: [Cluster-devel] cluster/rgmanager ChangeLog TODO include/resgr ... Message-ID: <20071130213633.23403.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Changes by: lhh at sourceware.org 2007-11-30 21:36:29 Modified files: rgmanager : ChangeLog TODO rgmanager/include: resgroup.h reslist.h restart_counter.h rg_locks.h rg_queue.h rgmanager/src/clulib: Makefile members.c rg_strings.c vft.c rgmanager/src/daemons: Makefile fo_domain.c groups.c main.c restree.c rg_event.c rg_forward.c rg_state.c rg_thread.c test.c rgmanager/src/resources: Makefile Added files: rgmanager : event-script.txt rgmanager/include: event.h rgmanager/src/clulib: sets.c rgmanager/src/daemons: event_config.c service_op.c slang_event.c rgmanager/src/resources: default_event_script.sl Removed files: rgmanager/src/daemons: nodeevent.c Log message: Add centralized S/Lang event script engine v0.8.1 Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/event-script.txt.diff?cvsroot=cluster&r1=NONE&r2=1.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.61&r2=1.62 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/TODO.diff?cvsroot=cluster&r1=1.8&r2=1.9 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/event.h.diff?cvsroot=cluster&r1=NONE&r2=1.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&r1=1.24&r2=1.25 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/reslist.h.diff?cvsroot=cluster&r1=1.24&r2=1.25 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/restart_counter.h.diff?cvsroot=cluster&r1=1.2&r2=1.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/rg_locks.h.diff?cvsroot=cluster&r1=1.3&r2=1.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/rg_queue.h.diff?cvsroot=cluster&r1=1.6&r2=1.7 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/sets.c.diff?cvsroot=cluster&r1=NONE&r2=1.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/Makefile.diff?cvsroot=cluster&r1=1.18&r2=1.19 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/members.c.diff?cvsroot=cluster&r1=1.4&r2=1.5 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/rg_strings.c.diff?cvsroot=cluster&r1=1.10&r2=1.11 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/vft.c.diff?cvsroot=cluster&r1=1.22&r2=1.23 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/event_config.c.diff?cvsroot=cluster&r1=NONE&r2=1.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/service_op.c.diff?cvsroot=cluster&r1=NONE&r2=1.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/slang_event.c.diff?cvsroot=cluster&r1=NONE&r2=1.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/Makefile.diff?cvsroot=cluster&r1=1.24&r2=1.25 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/fo_domain.c.diff?cvsroot=cluster&r1=1.14&r2=1.15 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.40&r2=1.41 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.45&r2=1.46 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&r1=1.39&r2=1.40 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_event.c.diff?cvsroot=cluster&r1=1.2&r2=1.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_forward.c.diff?cvsroot=cluster&r1=1.11&r2=1.12 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&r1=1.41&r2=1.42 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&r1=1.24&r2=1.25 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/test.c.diff?cvsroot=cluster&r1=1.13&r2=1.14 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&r1=1.9&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/default_event_script.sl.diff?cvsroot=cluster&r1=NONE&r2=1.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&r1=1.20&r2=1.21 /cvs/cluster/cluster/rgmanager/event-script.txt,v --> standard output revision 1.1 --- cluster/rgmanager/event-script.txt +++ - 2007-11-30 21:36:29.524613000 +0000 @@ -0,0 +1,305 @@ +TODO: +* Return correct error codes to clusvcadm (currently it always returns + "Unknown") +* Write glue for 'migrate' operations and migrate-enabled services + +Basic configuration specification: + + + + + + + + + (note, all service ops and such deal with node ID, not + with node names) + + + + + + service_owner="3"/> + + + service_owner="3"/> + + + + ... + + +General globals available from all scripts: + + node_self - local node ID + event_type - event class, either: + EVENT_NONE - unspecified / unknown + EVENT_NODE - node transition + EVENT_SERVICE - service transition + EVENT_USER - a user-generated request + EVENT_CONFIG - [NOT CONFIGURABLE] + +Node event globals (i.e. when event_type == EVENT_NODE): + + node_id - node ID which is transitioning + node_name - name of node which is transitioning + node_state - new node state (NODE_ONLINE or NODE_OFFLINE, or if you prefer, + 1 or 0, respectively) + node_clean - 0 if the node has not been fenced, 1 if the node has been + fenced + +Service event globals (i.e. when event_type == EVENT_SERVICE): + + service_name - Name of service which transitioned + service_state - new state of service + service_owner - new owner of service (or <0 if service is no longer + running) + service_last_owner - Last owner of service if known. Used for when + service_state = "recovering" generally, in order to + apply restart/relocate/disable policy. + +User event globals (i.e. when event_type == EVENT_USER): + + service_name - service to perform request upon + user_request - request to perform (USER_ENABLE, USER_DISABLE, + USER_STOP, USER_RELOCATE, [TODO] USER_MIGRATE) + user_target - target node ID if applicable + + +Scripting functions - Informational: + + node_list = nodes_online(); + + Returns a list of all online nodes. + + service_list = service_list(); + + Returns a list of all configured services. + + (restarts, last_owner, owner, state) = service_status(service_name); + + Returns the state, owner, last_owner, and restarts. Note that + all return values are optional, but are right-justified per S-Lang + specification. This means if you only want the 'state', you can use: + + (state) = service_status(service_name); + + However, if you need the restart count, you must provide all four + return values as above. + + (nofailback, restricted, ordered, node_list) = + service_domain_info(service_name); + + Returns the failover domain specification, if it exists, for the + specified service name. The node list returned is an ordered list + according to priority levels. In the case of unordered domains, + the ordering of the returned list is pseudo-random. + +Scripting functions - Operational: + + err = service_start(service_name, node_list, [avoid_list]); + + Start a non-running, (but runnable, i.e. not failed) + service on the first node in node_list. Failing that, start it on + the second node in node_list and so forth. One may also specify + an avoid list, but it's better to just use the subtract() function + below. + + err = service_stop(service_name, [0 = stop, 1 = disable]); + + Stop a running service. The second parameter is optional, and if + non-zero is specified, the service will enter the disabled state. + + ... stuff that's not done but needs to be: + + err = service_relocate(service_name, node_list); + + Move a running service to the specified node_list in order of + preference. In the case of VMs, this is actually a migrate-or- + relocate operation. + +Utility functions - Node list manipulation + + node_list = union(left_node_list, right_node_list); + + Calculates the union between the two node list, removing duplicates + and preserving ordering according to left_node_list. Any added + values from right_node_list will appear in their order, but + after left_node_list in the returned list. + + node_list = intersection(left_node_list, right_node_list); + + Calculates the intersection (items in both lists) between the two + node lists, removing duplicates and preserving ordering according + to left_node_list. Any added values from right_node_list will + appear in their order, but after left_node_list in the returned list. + + node_list = delta(left_node_list, right_node_list); + + Calculates the delta (items not in both lists) between the two + node lists, removing duplicates and preserving ordering according + to left_node_list. Any added values from right_node_list will + appear in their order, but after left_node_list in the returned list. + + node_list = subtract(left_node_list, right_node_list); + + Removes any duplicates as well as items specified in right_node_list + from left_node_list. Example: + + all_nodes = nodes_online(); + allowed_nodes = subtract(nodes_online, node_to_avoid); + +Utility functions - Logging: + + debug(item1, item2, ...); LOG_DEBUG level + info(...); LOG_INFO level + notice(...); LOG_NOTICE level + warning(...); LOG_WARNING level + err(...); LOG_ERR level + crit(...); LOG_CRIT level + alert(...); LOG_ALERT level + emerg(...); LOG_EMERG level + + items - These can be strings, integer lists, or integers. Logging + string lists is not supported. + + level - the level is consistent with syslog(8) + + stop_processing(); + + Calling this function will prevent further event scripts from being + executed on a particular event. Call this script if, for example, + you do not wish for the default event handler to process the event. + + Note: This does NOT terminate the caller script; that is, the + script being executed will run to completion. + +Event scripts are written in a language called S-Lang; documentation specifics +about the language are available@http://www.s-lang.org + +Example script (creating a follows-but-avoid-after-start behavior): +% +% If the main queue server and replication queue server are on the same +% node, relocate the replication server somewhere else if possible. +% +define my_sap_event_trigger() +{ + variable state, owner_rep, owner_main; + variable nodes, allowed; + + % + % If this was a service event, don't execute the default event + % script trigger after this script completes. + % + if (event_type == EVENT_SERVICE) { + stop_processing(); + } + + (owner_main, state) = service_status("service:main_queue"); + (owner_rep, state) = service_status("service:replication_server"); + + if ((event_type == EVENT_NODE) and (owner_main == node_id) and + (node_state == NODE_OFFLINE) and (owner_rep >= 0)) { + % + % uh oh, the owner of the main server died. Restart it + % on the node running the replication server + % + notice("Starting Main Queue Server on node ", owner_rep); + ()=service_start("service:main_queue", owner_rep); + return; + } + + % + % S-Lang doesn't short-circuit prior to 2.1.0 + % + if ((owner_main >= 0) and + ((owner_main == owner_rep) or (owner_rep < 0))) { + + % + % Get all online nodes + % + nodes = nodes_online(); + + % + % Drop out the owner of the main server + % + allowed = subtract(nodes, owner_main); + if ((owner_rep >= 0) and (length(allowed) == 0)) { + % + % Only one node is online and the rep server is + % already running. Don't do anything else. + % + return; + } + + if ((length(allowed) == 0) and (owner_rep < 0)) { + % + % Only node online is the owner ... go ahead + % and start it, even though it doesn't increase + % availability to do so. + % + allowed = owner_main; + } + + % + % Move the replication server off the node that is + % running the main server if a node's available. + % + if (owner_rep >= 0) { + ()=service_stop("service:replication_server"); + } + ()=service_start("service:replication_server", allowed); + } + + return; +} + +my_sap_event_trigger(); + + +Relevant section from cluster.conf: + + + + + + + + + + + + + + + + + + + + + --- cluster/rgmanager/ChangeLog 2007/11/30 20:36:17 1.61 +++ cluster/rgmanager/ChangeLog 2007/11/30 21:36:28 1.62 @@ -1,8 +1,58 @@ 2007-11-30 Lon Hohberger - * src/resources/*: Merge misc. updates from RHEL5 branch. - * src/utils/*: Merge misc. updates from RHEL5 branch. - * include/*.h, src/daemons/*: Merge status-counter patch - from RHEL5 branch. + * Commit RIND / S-Lang script engine [untested] + +[RHEL5 merged ChangeLog Entries] +2007-11-30 Lon Hohberger + * src/resources/clusterfs.sh: Retry mount up to 3 times to avoid + race condition during another process mounting a GFS volume + * src/resources/vm.sh, service.sh: Add defaults for values. + Make vm.sh work with more service attrs (max restarts) + * src/utils/clustat.c: Make output of clustat terminal-width + dependent + +2007-11-26 Lon Hohberger + * include/reslist.h: Add restart counters to resource node structure + (intended for top-level resources, i.e. services, vms...) + * include/restart_counter.h: Add header file for restart counter + * src/daemons/Makefile: Fix build to include restart counters + * src/daemons/restart_counter.c: Implement restart counters #247139 + * src/daemons/fo_domain.c, groups.c, restart_counter.c, resrules.c, + restree.c, test.c: Glue for restart counters. + * src/daemons/reslist.c: Glue for restart counters. Make expand_time + parser more robust to allow things like '1h30m' as a time value. + * src/daemons/main.c: Mark quorum disk offline in the correct + place to avoid extraneous log messages + * src/daemons/rg_state.c: Allow marking service as stopped if + stuck in recover state. Make service which failed to start + go to stopped state. Glue for restart counters. + * src/resources/service.sh, vm.sh: Add parameters for restart + counters #247139 + +2007-11-14 Lon Hohberger + * src/utils/clulog.c: Make clulog honor rgmanager log levels + (#289501) + * src/clulib/vft.c: Fix #303981 - crash on rgmanager restart in some + cases + * man/clusvcadm.8: Remove references to clushutdown from man page; + resolves #324151 + * src/resources/netfs.sh: Apply patch from Marco Ceci to fix #358161 + * src/resources/vm.sh: Make default migration policy live instead + of pause for Xen virtual machines. Also make it configurable instead + of static. Resolves #345871 + +2007-11-13 Lon Hohberger + * src/resources/clusterfs.sh: Add support for self_fence operation + to clusterfs resource agent + * src/resources/service.sh: Add default values to service.sh + +2007-10-26 Lon Hohberger + * src/daemons/main.c, src/utils/clustat.c, clusvcadm.c: + Fix #354391 + +2007-09-25 Lon Hohberger + * src/daemons/restree.c: Apply patch to fix side case re: 229650 + Patch from Simone Gotti. Resolves: #229650 +[End RHEL5 merged changes] 2007-08-30 Lon Hohberger * src/daemons/restree.c, rg_state.c: Fix tree-restart bug --- cluster/rgmanager/TODO 2006/07/19 18:43:32 1.8 +++ cluster/rgmanager/TODO 2007/11/30 21:36:28 1.9 @@ -1,5 +0,0 @@ -* Make live-migration of resources work; preferrably so that admins -can manually migrate Xen VMs to other nodes without telling the cluster -about it. That is, the cluster should be able to acquire running VMs -and update its state accordingly. -* Test against a working Xen build and shake out bugs /cvs/cluster/cluster/rgmanager/include/event.h,v --> standard output revision 1.1 --- cluster/rgmanager/include/event.h +++ - 2007-11-30 21:36:29.926075000 +0000 @@ -0,0 +1,145 @@ +/* + Copyright Red Hat, Inc. 2007 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License version 2 as published + by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + MA 02139, USA. +*/ +#ifndef _EVENT_H +#define _EVENT_H + +/* 128 is a bit big, but it should be okay */ +typedef struct __rge_q { + char rg_name[128]; + uint32_t rg_state; + uint32_t pad1; + int rg_owner; + int rg_last_owner; +} group_event_t; + +typedef struct __ne_q { + int ne_local; + int ne_nodeid; + int ne_state; + int ne_clean; +} node_event_t; + +typedef struct __cfg_q { + int cfg_version; + int cfg_oldversion; +} config_event_t; + +typedef struct __user_q { + char u_name[128]; + msgctx_t *u_ctx; + int u_request; + int u_arg1; + int u_arg2; + int u_target; /* Node ID */ +} user_event_t; + +typedef enum { + EVENT_NONE=0, + EVENT_CONFIG, + EVENT_NODE, + EVENT_RG, + EVENT_USER +} event_type_t; + +/* Data that's distributed which indicates which + node is the event master */ +typedef struct __rgm { + uint32_t m_magic; + uint32_t m_nodeid; + uint64_t m_master_time; + uint8_t m_reserved[112]; +} event_master_t; + +#define swab_event_master_t(ptr) \ +{\ + swab32((ptr)->m_nodeid);\ + swab32((ptr)->m_magic);\ + swab64((ptr)->m_master_time);\ +} + +/* Just a magic # to help us ensure we've got good + date from VF */ +#define EVENT_MASTER_MAGIC 0xfabab0de + +/* Event structure - internal to the event subsystem; use + the queueing functions below which allocate this struct + and pass it to the event handler */ +typedef struct _event { + /* Not used dynamically - part of config info */ + list_head(); + char *ev_name; + char *ev_script; + char *ev_script_file; + int ev_prio; + int ev_pad; + /* --- end config part */ + int ev_type; /* config & generated by rgmanager*/ + int ev_transaction; + union { + group_event_t group; + node_event_t node; + config_event_t config; + user_event_t user; + } ev; +} event_t; + +#define EVENT_PRIO_COUNT 100 + +typedef struct _event_table { + int max_prio; + int pad; + event_t *entries[0]; +} event_table_t; + + +int construct_events(int ccsfd, event_table_t **); +void deconstruct_events(event_table_t **); +void print_events(event_table_t *); + +/* Does the event match a configured event? */ +int event_match(event_t *pattern, event_t *actual); + +/* Event queueing functions. */ +void node_event_q(int local, int nodeID, int state, int clean); +void rg_event_q(char *name, uint32_t state, int owner, int last); +void user_event_q(char *svc, int request, int arg1, int arg2, + int target, msgctx_t *ctx); +void config_event_q(int old_version, int new_version); + +/* Call this to see if there's a master. */ +int event_master_info_cached(event_master_t *); + +/* Call this to get the node ID of the current + master *or* become the master if none exists */ +int event_master(void); + +/* Setup */ +int central_events_enabled(void); +void set_central_events(int flag); +int slang_process_event(event_table_t *event_table, event_t *ev); + +/* For distributed events. */ +void set_transition_throttling(int nsecs); + +/* Simplified service start. */ +int service_op_start(char *svcName, int *target_list, int target_list_len, + int *new_owner); +int service_op_stop(char *svcName, int do_disable, int event_type); + + +#endif --- cluster/rgmanager/include/resgroup.h 2007/11/30 20:36:17 1.24 +++ cluster/rgmanager/include/resgroup.h 2007/11/30 21:36:28 1.25 @@ -67,9 +67,16 @@ #define RG_PORT 177 + +/* Constants moved to src/clulib/constants.c */ +/* DO NOT EDIT */ #define RG_MAGIC 0x11398fed #define RG_ACTION_REQUEST /* Message header */ 0x138582 +/* Argument to RG_ACTION_REQUEST */ +#define RG_ACTION_MASTER 0xfe0db143 +#define RG_ACTION_USER 0x3f173bfd +/* */ #define RG_EVENT 0x138583 /* Requests */ @@ -130,6 +137,7 @@ #define RG_FLAG_FROZEN (1<<0) /** Resource frozen */ const char *rg_state_str(int val); +int rg_state_str_to_id(const char *val); const char *rg_flags_str(char *flags_string, size_t size, int val, char *separator); const char *agent_op_str(int val); @@ -140,7 +148,7 @@ int group_op(char *rgname, int op); void rg_init(void); -/* FOOM */ +/* Basic service operations */ int svc_start(char *svcName, int req); int svc_stop(char *svcName, int error); int svc_status(char *svcName); @@ -157,7 +165,8 @@ int max, uint32_t target, int arg0, int arg1); void send_response(int ret, int node, request_t *req); -void send_ret(msgctx_t *ctx, char *name, int ret, int req); +void send_ret(msgctx_t *ctx, char *name, int ret, int orig_request, + int new_owner); /* do this op on all resource groups. The handler for the request will sort out whether or not it's a valid request given the state */ @@ -168,6 +177,7 @@ /* from rg_state.c */ int set_rg_state(char *name, rg_state_t *svcblk); int get_rg_state(char *servicename, rg_state_t *svcblk); +int get_rg_state_local(char *servicename, rg_state_t *svcblk); uint32_t best_target_node(cluster_member_list_t *allowed, uint32_t owner, char *rg_name, int lock); @@ -192,6 +202,10 @@ int my_id(void); /* Return codes */ +#define RG_EDOMAIN -15 /* Service not runnable given the + set of nodes and its failover + domain */ +#define RG_ESCRIPT -14 /* S/Lang script failed */ #define RG_EFENCE -13 /* Fencing operation pending */ #define RG_ENODE -12 /* Node is dead/nonexistent */ #define RG_EFROZEN -11 /* Service is frozen */ @@ -209,6 +223,7 @@ #define RG_YES 1 #define RG_NO 2 + const char *rg_strerror(int val); --- cluster/rgmanager/include/reslist.h 2007/11/30 20:36:17 1.24 +++ cluster/rgmanager/include/reslist.h 2007/11/30 21:36:28 1.25 @@ -202,6 +202,8 @@ void print_domains(fod_t **domains); int node_should_start(int nodeid, cluster_member_list_t *membership, char *rg_name, fod_t **domains); +int node_domain_set(fod_t *domain, int **ret, int *retlen); +int node_domain_set_safe(char *domainname, int **ret, int *retlen, int *flags); /* @@ -210,6 +212,7 @@ resource_t *find_resource_by_ref(resource_t **reslist, char *type, char *ref); resource_t *find_root_by_ref(resource_t **reslist, char *ref); resource_rule_t *find_rule_by_type(resource_rule_t **rulelist, char *type); +void res_build_name(char *, size_t, resource_t *); /* Internal functions; shouldn't be needed. --- cluster/rgmanager/include/restart_counter.h 2007/11/30 20:36:17 1.2 +++ cluster/rgmanager/include/restart_counter.h 2007/11/30 21:36:28 1.3 @@ -1,3 +1,22 @@ +/* + Copyright Red Hat, Inc. 2007 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License version 2 as published + by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + MA 02139, USA. +*/ +/* Time-based restart counters for rgmanager */ + #ifndef _RESTART_COUNTER_H #define _RESTART_COUNTER_H --- cluster/rgmanager/include/rg_locks.h 2006/12/18 21:55:27 1.3 +++ cluster/rgmanager/include/rg_locks.h 2007/11/30 21:36:28 1.4 @@ -1,3 +1,20 @@ +/* + Copyright Red Hat, Inc. 2004-2007 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License version 2 as published + by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + MA 02139, USA. +*/ #ifndef __RG_LOCKS_H #define __RG_LOCKS_H --- cluster/rgmanager/include/rg_queue.h 2006/07/19 18:43:32 1.6 +++ cluster/rgmanager/include/rg_queue.h 2007/11/30 21:36:28 1.7 @@ -1,3 +1,20 @@ +/* + Copyright Red Hat, Inc. 2004-2007 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License version 2 as published + by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + MA 02139, USA. +*/ #ifndef _RG_QUEUE_H #define _RG_QUEUE_H #include @@ -19,7 +36,7 @@ uint32_t rr_target; /** Target node */ uint32_t rr_arg0; /** Integer argument */ uint32_t rr_arg1; /** Integer argument */ - uint32_t rr_arg3; /** Integer argument */ + uint32_t rr_arg2; /** Integer argument */ uint32_t rr_line; /** Line no */ msgctx_t * rr_resp_ctx; /** FD to send response */ char *rr_file; /** Who made req */ @@ -42,5 +59,7 @@ void rq_free(request_t *foo); void forward_request(request_t *req); +void forward_message(msgctx_t *ctx, void *msg, int nodeid); + #endif /cvs/cluster/cluster/rgmanager/src/clulib/sets.c,v --> standard output revision 1.1 --- cluster/rgmanager/src/clulib/sets.c +++ - 2007-11-30 21:36:30.630539000 +0000 @@ -0,0 +1,370 @@ +/* + Copyright Red Hat, Inc. 2007 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License version 2 as published + by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + MA 02139, USA. +*/ +/** + @file sets.c - Order-preserving set functions (union / intersection / delta) + (designed for integer types; a la int, uint64_t, etc...) + @author Lon Hohberger + */ +#include +#include +#include +#include +#include +#include + + +/** + Add a value to a set. This function disregards an add if the value is already + in the set. Note that the maximum length of set s must be preallocated; this + function doesn't do error or bounds checking. + + @param s Set to modify + @param curlen Current length (modified if added) + @param val Value to add + @return 0 if not added, 1 if added + */ +int +s_add(set_type_t *s, int *curlen, set_type_t val) +{ + int idx=0; + + for (; idx < *curlen; idx++) + if (s[idx] == val) + return 0; + s[*curlen] = val; + ++(*curlen); + return 1; +} + + +/** + Union-set function. Allocates and returns a new set which is the union of + the two given sets 'left' and 'right'. Also returns the new set length. + + @param left Left set - order is preserved on this set; that is, + this is the set where the caller cares about ordering. + @param ll Length of left set. + @param right Right set - order is not preserved on this set during + the union operation + @param rl Length of right set + @param ret Return set. Should * not * be preallocated. + @param retl Return set length. Should be ready to accept 1 integer + upon calling this function + @return 0 on success, -1 on error + */ +int +s_union(set_type_t *left, int ll, set_type_t *right, int rl, + set_type_t **ret, int *retl) +{ + int l, r, cnt = 0, total; + + total = ll + rl; /* Union will never exceed both sets */ + + *ret = malloc(sizeof(set_type_t)*total); + if (!*ret) { + return -1; + } + memset((void *)(*ret), 0, sizeof(set_type_t)*total); + + cnt = 0; + + /* Add all the ones on the left */ + for (l = 0; l < ll; l++) + s_add(*ret, &cnt, left[l]); + + /* Add the ones on the left */ + for (r = 0; r < rl; r++) + s_add(*ret, &cnt, right[r]); + + *retl = cnt; + + return 0; +} + + +/** + Intersection-set function. Allocates and returns a new set which is the + intersection of the two given sets 'left' and 'right'. Also returns the new + set length. + + @param left Left set - order is preserved on this set; that is, + this is the set where the caller cares about ordering. + @param ll Length of left set. + @param right Right set - order is not preserved on this set during + the union operation + @param rl Length of right set + @param ret Return set. Should * not * be preallocated. + @param retl Return set length. Should be ready to accept 1 integer + upon calling this function + @return 0 on success, -1 on error + */ +int +s_intersection(set_type_t *left, int ll, set_type_t *right, int rl, + set_type_t **ret, int *retl) +{ + int l, r, cnt = 0, total; + + total = ll; /* Intersection will never exceed one of the two set + sizes */ + + *ret = malloc(sizeof(set_type_t)*total); + if (!*ret) { + return -1; + } + memset((void *)(*ret), 0, sizeof(set_type_t)*total); + + cnt = 0; + /* Find duplicates */ + for (l = 0; l < ll; l++) { + for (r = 0; r < rl; r++) { + if (left[l] != right[r]) + continue; + if (s_add(*ret, &cnt, right[r])) + break; + } + } + + *retl = cnt; + return 0; +} + + +/** + Delta-set function. Allocates and returns a new set which is the delta (i.e. + numbers not in both sets) of the two given sets 'left' and 'right'. Also + returns the new set length. + + @param left Left set - order is preserved on this set; that is, + this is the set where the caller cares about ordering. + @param ll Length of left set. + @param right Right set - order is not preserved on this set during + the union operation + @param rl Length of right set + @param ret Return set. Should * not * be preallocated. + @param retl Return set length. Should be ready to accept 1 integer + upon calling this function + @return 0 on success, -1 on error + */ +int +s_delta(set_type_t *left, int ll, set_type_t *right, int rl, + set_type_t **ret, int *retl) +{ + int l, r, cnt = 0, total, found; + + total = ll + rl; /* Union will never exceed both sets */ + + *ret = malloc(sizeof(set_type_t)*total); + if (!*ret) { + return -1; + } + memset((void *)(*ret), 0, sizeof(set_type_t)*total); + + cnt = 0; + + /* not efficient, but it works */ + /* Add all the ones on the left */ + for (l = 0; l < ll; l++) { + found = 0; + for (r = 0; r < rl; r++) { + if (right[r] == left[l]) { + found = 1; + break; + } + } + + if (found) + continue; + s_add(*ret, &cnt, left[l]); + } + + + /* Add all the ones on the right*/ + for (r = 0; r < rl; r++) { + found = 0; + for (l = 0; l < ll; l++) { + if (right[r] == left[l]) { + found = 1; + break; + } + } + + if (found) + continue; + s_add(*ret, &cnt, right[r]); + } + + *retl = cnt; + + return 0; +} + + +/** + Subtract-set function. Allocates and returns a new set which is the + subtraction of the right set from the left set. + Also returns the new set length. + + @param left Left set - order is preserved on this set; that is, + this is the set where the caller cares about ordering. + @param ll Length of left set. + @param right Right set - order is not preserved on this set during + the union operation + @param rl Length of right set + @param ret Return set. Should * not * be preallocated. + @param retl Return set length. Should be ready to accept 1 integer + upon calling this function + @return 0 on success, -1 on error + */ +int +s_subtract(set_type_t *left, int ll, set_type_t *right, int rl, + set_type_t **ret, int *retl) +{ + int l, r, cnt = 0, total, found; + + total = ll; /* Union will never exceed left set length*/ + + *ret = malloc(sizeof(set_type_t)*total); + if (!*ret) { + return -1; + } + memset((void *)(*ret), 0, sizeof(set_type_t)*total); + + cnt = 0; + + /* not efficient, but it works */ + for (l = 0; l < ll; l++) { + found = 0; + for (r = 0; r < rl; r++) { + if (right[r] == left[l]) { + found = 1; + break; + } + } + + if (found) + continue; + s_add(*ret, &cnt, left[l]); + } + + *retl = cnt; + + return 0; +} + + +/** + Shuffle-set function. Weakly randomizes ordering of a set in-place. + + @param set Set to randomize + @param sl Length of set + @return 0 + */ +int +s_shuffle(set_type_t *set, int sl) +{ + int x, newidx; + unsigned r_state = 0; + set_type_t t; + struct timeval tv; + + gettimeofday(&tv, NULL); + r_state = (int)(tv.tv_usec); + + for (x = 0; x < sl; x++) { + newidx = (rand_r(&r_state) % sl); + if (newidx == x) + continue; + t = set[x]; + set[x] = set[newidx]; + set[newidx] = t; + } + + return 0; +} + + +#ifdef STANDALONE +/* Testbed */ +/* + gcc -o sets sets.c -DSTANDALONE -ggdb -I../../include \ + -Wall -Werror -Wstrict-prototypes -Wextra + */ +int +main(int __attribute__ ((unused)) argc, char __attribute__ ((unused)) **argv) +{ + set_type_t a[] = { 1, 2, 3, 3, 3, 2, 2, 3 }; + set_type_t b[] = { 2, 3, 4 }; + set_type_t *i; + int ilen = 0, x; + + s_union(a, 8, b, 3, &i, &ilen); + + /* Should return length of 4 - { 1 2 3 4 } */ + printf("set_union [%d] = ", ilen); + for ( x = 0; x < ilen; x++) { + printf("%d ", (int)i[x]); + } + printf("\n"); + + s_shuffle(i, ilen); + printf("shuffled [%d] = ", ilen); + for ( x = 0; x < ilen; x++) { + printf("%d ", (int)i[x]); + } + printf("\n"); + + + free(i); + + /* Should return length of 2 - { 2 3 } */ + s_intersection(a, 8, b, 3, &i, &ilen); + + printf("set_intersection [%d] = ", ilen); + for ( x = 0; x < ilen; x++) { + printf("%d ", (int)i[x]); + } + printf("\n"); + + free(i); + + /* Should return length of 2 - { 1 4 } */ + s_delta(a, 8, b, 3, &i, &ilen); + + printf("set_delta [%d] = ", ilen); + for ( x = 0; x < ilen; x++) { + printf("%d ", (int)i[x]); + } + printf("\n"); + + free(i); + + /* Should return length of 1 - { 1 } */ + s_subtract(a, 8, b, 3, &i, &ilen); + + printf("set_subtract [%d] = ", ilen); + for ( x = 0; x < ilen; x++) { + printf("%d ", (int)i[x]); + } + printf("\n"); + + free(i); + + + return 0; +} +#endif --- cluster/rgmanager/src/clulib/Makefile 2007/11/12 08:17:00 1.18 +++ cluster/rgmanager/src/clulib/Makefile 2007/11/30 21:36:28 1.19 @@ -19,7 +19,7 @@ OBJS1= clulog.o daemon_init.o signals.o msgsimple.o \ gettid.o rg_strings.o message.o members.o fdops.o \ lock.o cman.o vft.o msg_cluster.o msg_socket.o \ - wrap_lock.o + wrap_lock.o sets.o OBJS2= alloc.o --- cluster/rgmanager/src/clulib/members.c 2006/09/27 16:28:41 1.4 +++ cluster/rgmanager/src/clulib/members.c 2007/11/30 21:36:28 1.5 @@ -233,6 +233,50 @@ int +member_low_id(void) +{ + int x = 0, low = -1; + + pthread_rwlock_wrlock(&memblock); + if (!membership) { + pthread_rwlock_unlock(&memblock); + return low; + } + + for (x = 0; x < membership->cml_count; x++) { + if ((membership->cml_members[x].cn_member) && + ((membership->cml_members[x].cn_nodeid < low) || (low == -1))) + low = membership->cml_members[x].cn_nodeid; + } + pthread_rwlock_unlock(&memblock); + + return low; +} + + +int +member_high_id(void) +{ + int x = 0, high = -1; + + pthread_rwlock_wrlock(&memblock); + if (!membership) { + pthread_rwlock_unlock(&memblock); + return high; + } + + for (x = 0; x < membership->cml_count; x++) { + if (membership->cml_members[x].cn_member && + (membership->cml_members[x].cn_nodeid > high)) + high = membership->cml_members[x].cn_nodeid; + } + pthread_rwlock_unlock(&memblock); + + return high; +} + + +int member_online(int nodeid) { int x = 0, ret = 0; --- cluster/rgmanager/src/clulib/rg_strings.c 2007/07/31 18:00:25 1.10 +++ cluster/rgmanager/src/clulib/rg_strings.c 2007/11/30 21:36:28 1.11 @@ -26,6 +26,8 @@ const struct string_val rg_error_strings[] = { + { RG_EDOMAIN, "Service not runnable" }, + { RG_ESCRIPT, "S/Lang Script Error" }, { RG_EFENCE, "Fencing operation pending; try again later" }, { RG_ENODE, "Target node dead / nonexistent" }, { RG_ERUN, "Service is already running" }, @@ -147,6 +149,21 @@ return "Unknown"; } +static inline int +rg_search_table_by_str(const struct string_val *table, const char *val) +{ + int x; + + for (x = 0; table[x].str != NULL; x++) { + if (!strcasecmp(table[x].str, val)) + return table[x].val; + } + + return -1; +} + + + const char * rg_strerror(int val) { @@ -159,6 +176,14 @@ return rg_search_table(rg_state_strings, val); } +int +rg_state_str_to_id(const char *val) +{ + return rg_search_table_by_str(rg_state_strings, val); +} + + + const char * rg_flags_str(char *flags_string, size_t size, int val, char *separator) { --- cluster/rgmanager/src/clulib/vft.c 2007/11/30 21:01:27 1.22 +++ cluster/rgmanager/src/clulib/vft.c 2007/11/30 21:36:28 1.23 @@ -1734,55 +1734,52 @@ } msg_close(&ctx); msg = (vf_msg_t *)gh; - break; - } - - if (x >= membership->cml_count) - return VFR_ERROR; - - /* Uh oh */ - if (!msg || (msg == &rmsg)) { - printf("VF: No valid message\n"); - return VFR_ERROR; - } - - swab_generic_msg_hdr(&(msg->vm_hdr)); - if (msg->vm_hdr.gh_command == VF_NACK) { - free(msg); - return VFR_NODATA; - } - if (msg->vm_hdr.gh_length < sizeof(vf_msg_t)) { - fprintf(stderr, "VF: Short reply from %d\n", x); - free(msg); - return VFR_ERROR; - } - - if (msg->vm_hdr.gh_length > n) { - fprintf(stderr,"VF: Size mismatch during decode (%d > %d)\n", - msg->vm_hdr.gh_length, n); - free(msg); - return VFR_ERROR; - } + /* Uh oh */ + if (!msg || (msg == &rmsg)) { + printf("VF: No valid message\n"); + return VFR_ERROR; + } + swab_generic_msg_hdr(&(msg->vm_hdr)); + if (msg->vm_hdr.gh_command == VF_NACK) { + free(msg); + continue; + } + if (msg->vm_hdr.gh_length < sizeof(vf_msg_t)) { + fprintf(stderr, "VF: Short reply from %d\n", x); + free(msg); + continue; + } + if (msg->vm_hdr.gh_length > n) { + fprintf(stderr, + "VF: Size mismatch during decode (%d > %d)\n", + msg->vm_hdr.gh_length, n); + free(msg); + continue; + } - swab_vf_msg_info_t(&(msg->vm_msg)); + swab_vf_msg_info_t(&(msg->vm_msg)); - if (msg->vm_msg.vf_datalen != (n - sizeof(*msg))) { - fprintf(stderr,"VF: Size mismatch during decode (\n"); - free(msg); - return VFR_ERROR; - } + if (msg->vm_msg.vf_datalen != (n - sizeof(*msg))) { + fprintf(stderr,"VF: Size mismatch during decode (\n"); + free(msg); + continue; + } - if (vf_set_current(keyid, msg->vm_msg.vf_view, + /* Ok... we've got data! */ + if (vf_set_current(keyid, msg->vm_msg.vf_view, msg->vm_msg.vf_data, msg->vm_msg.vf_datalen) == VFR_ERROR) { + free(msg); + return VFR_ERROR; + } + free(msg); - return VFR_ERROR; - } - free(msg); + return VFR_OK; + } - return VFR_OK; + return VFR_NODATA; } /cvs/cluster/cluster/rgmanager/src/daemons/event_config.c,v --> standard output revision 1.1 --- cluster/rgmanager/src/daemons/event_config.c +++ - 2007-11-30 21:36:31.406600000 +0000 @@ -0,0 +1,541 @@ +/** + Copyright Red Hat, Inc. 2002-2007 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License version 2 as published + by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + MA 02139, USA. +*/ +/** @file + * CCS event parsing, based on failover domain parsing + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CONFIG_NODE_ID_TO_NAME \ + "/cluster/clusternodes/clusternode[@nodeid=\"%d\"]/@name" +#define CONFIG_NODE_NAME_TO_ID \ + "/cluster/clusternodes/clusternode[@name=\"%s\"]/@nodeid" + +void deconstruct_events(event_table_t **); +void print_event(event_t *ev); + +//#define DEBUG + +#ifdef DEBUG +#define ENTER() clulog(LOG_DEBUG, "ENTER: %s\n", __FUNCTION__) +#define RETURN(val) {\ + clulog(LOG_DEBUG, "RETURN: %s line=%d value=%d\n", __FUNCTION__, \ + __LINE__, (val));\ + return(val);\ +} +#else +#define ENTER() +#define RETURN(val) return(val) +#endif + +#ifdef NO_CCS +#define ccs_get(fd, query, ret) conf_get(query, ret) +#endif + +/* + + + slang_script_stuff(); + start_service(); + + + */ +int +event_match(event_t *pattern, event_t *actual) +{ + if (pattern->ev_type != EVENT_NONE && + actual->ev_type != pattern->ev_type) + return 0; + + /* If there's no event class specified, the rest is + irrelevant */ + if (pattern->ev_type == EVENT_NONE) + return 1; + + switch(pattern->ev_type) { + case EVENT_NODE: + if (pattern->ev.node.ne_nodeid >= 0 && + actual->ev.node.ne_nodeid != + pattern->ev.node.ne_nodeid) { + return 0; + } + if (pattern->ev.node.ne_local >= 0 && + actual->ev.node.ne_local != + pattern->ev.node.ne_local) { + return 0; + } + if (pattern->ev.node.ne_state >= 0 && + actual->ev.node.ne_state != + pattern->ev.node.ne_state) { + return 0; + } + if (pattern->ev.node.ne_clean >= 0 && + actual->ev.node.ne_clean != + pattern->ev.node.ne_clean) { + return 0; + } + return 1; /* All specified params match */ + case EVENT_RG: + if (pattern->ev.group.rg_name[0] && + strcasecmp(actual->ev.group.rg_name, + pattern->ev.group.rg_name)) { + return 0; + } + if (pattern->ev.group.rg_state != (uint32_t)-1 && + actual->ev.group.rg_state != + pattern->ev.group.rg_state) { + return 0; + } + if (pattern->ev.group.rg_owner >= 0 && + actual->ev.group.rg_owner != + pattern->ev.group.rg_owner) { + return 0; + } + return 1; + case EVENT_CONFIG: + if (pattern->ev.config.cfg_version >= 0 && + actual->ev.config.cfg_version != + pattern->ev.config.cfg_version) { + return 0; + } + if (pattern->ev.config.cfg_oldversion >= 0 && + actual->ev.config.cfg_oldversion != + pattern->ev.config.cfg_oldversion) { + return 0; + } + return 1; + case EVENT_USER: + if (pattern->ev.user.u_name[0] && + strcasecmp(actual->ev.user.u_name, + pattern->ev.user.u_name)) { + return 0; + } + if (pattern->ev.user.u_request != 0 && + actual->ev.user.u_request != + pattern->ev.user.u_request) { + return 0; + } + if (pattern->ev.user.u_target != 0 && + actual->ev.user.u_target != + pattern->ev.user.u_target) { + return 0; + } + return 1; + default: + break; + } + + return 0; +} + + +char * +ccs_node_id_to_name(int ccsfd, int nodeid) +{ + char xpath[256], *ret = 0; + + snprintf(xpath, sizeof(xpath), CONFIG_NODE_ID_TO_NAME, + nodeid); + if (ccs_get(ccsfd, xpath, &ret) == 0) + return ret; + return NULL; +} + + +int +ccs_node_name_to_id(int ccsfd, char *name) +{ + char xpath[256], *ret = 0; + int rv = 0; + + snprintf(xpath, sizeof(xpath), CONFIG_NODE_NAME_TO_ID, + name); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + rv = atoi(ret); + free(ret); + return rv; + } + return 0; +} + + +static void +deconstruct_event(event_t *ev) +{ + if (ev->ev_script) + free(ev->ev_script); + if (ev->ev_name) + free(ev->ev_name); + free(ev); +} + + +static int +get_node_event(int ccsfd, char *base, event_t *ev) +{ + char xpath[256], *ret = NULL; + + /* Clear out the possibilitiies */ + ev->ev.node.ne_nodeid = -1; + ev->ev.node.ne_local = -1; + ev->ev.node.ne_state = -1; + ev->ev.node.ne_clean = -1; + + snprintf(xpath, sizeof(xpath), "%s/@node_id", base); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + ev->ev.node.ne_nodeid = atoi(ret); + free(ret); + if (ev->ev.node.ne_nodeid <= 0) + return -1; + } else { + /* See if there's a node name */ + snprintf(xpath, sizeof(xpath), "%s/@node", base); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + ev->ev.node.ne_nodeid = + ccs_node_name_to_id(ccsfd, ret); + free(ret); + if (ev->ev.node.ne_nodeid <= 0) + return -1; + } + } + + snprintf(xpath, sizeof(xpath), "%s/@node_state", base); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + if (!strcasecmp(ret, "up")) { + ev->ev.node.ne_state = 1; + } else if (!strcasecmp(ret, "down")) { + ev->ev.node.ne_state = 0; + } else { + ev->ev.node.ne_state = !!atoi(ret); + } + free(ret); + } + + snprintf(xpath, sizeof(xpath), "%s/@node_clean", base); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + ev->ev.node.ne_clean = !!atoi(ret); + free(ret); + } + + snprintf(xpath, sizeof(xpath), "%s/@node_local", base); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + ev->ev.node.ne_local = !!atoi(ret); + free(ret); + } + + return 0; +} + + +static int +get_rg_event(int ccsfd, char *base, event_t *ev) +{ + char xpath[256], *ret = NULL; + + /* Clear out the possibilitiies */ + ev->ev.group.rg_name[0] = 0; + ev->ev.group.rg_state = (uint32_t)-1; + ev->ev.group.rg_owner = -1; + + snprintf(xpath, sizeof(xpath), "%s/@service", base); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + strncpy(ev->ev.group.rg_name, ret, + sizeof(ev->ev.group.rg_name)); + free(ret); + if (!strlen(ev->ev.group.rg_name)) { + return -1; + } + } + + snprintf(xpath, sizeof(xpath), "%s/@service_state", base); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + if (!isdigit(ret[0])) { + ev->ev.group.rg_state = + rg_state_str_to_id(ret); + } else { + ev->ev.group.rg_state = atoi(ret); + } + free(ret); + } + + snprintf(xpath, sizeof(xpath), "%s/@service_owner", base); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + if (!isdigit(ret[0])) { + ev->ev.group.rg_owner = + ccs_node_name_to_id(ccsfd, ret); + } else { + ev->ev.group.rg_owner = !!atoi(ret); + } + free(ret); + } + + return 0; +} + + +static int +get_config_event(int __attribute__((unused)) ccsfd, + char __attribute__((unused)) *base, + event_t __attribute__((unused)) *ev) +{ + errno = ENOSYS; + return -1; +} + + +static event_t * +get_event(int ccsfd, char *base, int idx, int *_done) +{ + event_t *ev; + char xpath[256]; + char *ret = NULL; + + *_done = 0; + snprintf(xpath, sizeof(xpath), "%s/event[%d]/@name", + base, idx); + if (ccs_get(ccsfd, xpath, &ret) != 0) { + *_done = 1; + return NULL; + } + + ev = malloc(sizeof(*ev)); + if (!ev) + return NULL; + memset(ev, 0, sizeof(*ev)); + ev->ev_name = ret; + + /* Get the script file / inline from config */ + ret = NULL; + snprintf(xpath, sizeof(xpath), "%s/event[%d]/@file", + base, idx); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + ev->ev_script_file = ret; + } else { + snprintf(xpath, sizeof(xpath), "%s/event[%d]", + base, idx); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + ev->ev_script = ret; + } else { + goto out_fail; + } + } + + /* Get the priority ordering (must be nonzero) */ + ev->ev_prio = 99; + ret = NULL; + snprintf(xpath, sizeof(xpath), "%s/event[%d]/@priority", + base, idx); + if (ccs_get(ccsfd, xpath, &ret) == 0) { + ev->ev_prio = atoi(ret); + if (ev->ev_prio <= 0 || ev->ev_prio > EVENT_PRIO_COUNT) { + clulog(LOG_ERR, + "event %s: priority %s invalid\n", + ev->ev_name, ret); + goto out_fail; + } + free(ret); + } + + /* Get the event class */ + snprintf(xpath, sizeof(xpath), "%s/event[%d]/@class", + base, idx); + ret = NULL; + if (ccs_get(ccsfd, xpath, &ret) == 0) { + snprintf(xpath, sizeof(xpath), "%s/event[%d]", + base, idx); + if (!strcasecmp(ret, "node")) { + ev->ev_type = EVENT_NODE; + if (get_node_event(ccsfd, xpath, ev) < 0) + goto out_fail; + } else if (!strcasecmp(ret, "service") || + !strcasecmp(ret, "resource") || + !strcasecmp(ret, "rg") ) { + ev->ev_type = EVENT_RG; + if (get_rg_event(ccsfd, xpath, ev) < 0) + goto out_fail; + } else if (!strcasecmp(ret, "config") || + !strcasecmp(ret, "reconfig")) { + ev->ev_type = EVENT_CONFIG; + if (get_config_event(ccsfd, xpath, ev) < 0) + goto out_fail; + } else { + clulog(LOG_ERR, + "event %s: class %s unrecognized\n", + ev->ev_name, ret); + goto out_fail; + } + + free(ret); + ret = NULL; + } + + return ev; +out_fail: + if (ret) + free(ret); + deconstruct_event(ev); + return NULL; +} + + +static event_t * +get_default_event(void) +{ + event_t *ev; + char xpath[1024]; + + ev = malloc(sizeof(*ev)); + if (!ev) + return NULL; + memset(ev, 0, sizeof(*ev)); + ev->ev_name = strdup("Default"); + + /* Get the script file / inline from config */ + snprintf(xpath, sizeof(xpath), "%s/default_event_script.sl", + RESOURCE_ROOTDIR); + + ev->ev_prio = 100; + ev->ev_type = EVENT_NONE; + ev->ev_script_file = strdup(xpath); + if (!ev->ev_script_file || ! ev->ev_name) { + deconstruct_event(ev); + return NULL; + } + + return ev; +} + + +/** + * similar API to failover domain + */ +int +construct_events(int ccsfd, event_table_t **events) +{ + char xpath[256]; + event_t *ev; + int x = 1, done = 0; + + /* Allocate the event list table */ + *events = malloc(sizeof(event_table_t) + + sizeof(event_t) * (EVENT_PRIO_COUNT+1)); + if (!*events) + return -1; + memset(*events, 0, sizeof(event_table_t) + + sizeof(event_t) * (EVENT_PRIO_COUNT+1)); + (*events)->max_prio = EVENT_PRIO_COUNT; + + snprintf(xpath, sizeof(xpath), + RESOURCE_TREE_ROOT "/events"); + + do { + ev = get_event(ccsfd, xpath, x++, &done); + if (ev) + list_insert(&((*events)->entries[ev->ev_prio]), ev); + } while (!done); + + ev = get_default_event(); + if (ev) + list_insert(&((*events)->entries[ev->ev_prio]), ev); + + return 0; +} + + +void +print_event(event_t *ev) +{ + printf(" Name: %s\n", ev->ev_name); + + switch(ev->ev_type) { + case EVENT_NODE: + printf(" Node %d State %d\n", ev->ev.node.ne_nodeid, + ev->ev.node.ne_state); + break; + case EVENT_RG: + printf(" RG %s State %s\n", ev->ev.group.rg_name, + rg_state_str(ev->ev.group.rg_state)); + break; + case EVENT_CONFIG: + printf(" Config change - unsupported\n"); + break; + default: + printf(" (Any event)\n"); + break; + } + + if (ev->ev_script) { + printf(" Inline script.\n"); + } else { + printf(" File: %s\n", ev->ev_script_file); + } +} + + +void +print_events(event_table_t *events) +{ + int x, y; + event_t *ev; + + for (x = 0; x <= events->max_prio; x++) { + if (!events->entries[x]) + continue; + printf("Event Priority Level %d:\n", x); + list_for(&(events->entries[x]), ev, y) { + print_event(ev); + } + } +} + + +void +deconstruct_events(event_table_t **eventsp) +{ + int x; + event_table_t *events = *eventsp; + event_t *ev = NULL; + + if (!events) + return; + + for (x = 0; x <= events->max_prio; x++) { + while ((ev = (events->entries[x]))) { + list_remove(&(events->entries[x]), ev); + deconstruct_event(ev); + } + } + + free(events); + *eventsp = NULL; +} + + /cvs/cluster/cluster/rgmanager/src/daemons/service_op.c,v --> standard output revision 1.1 --- cluster/rgmanager/src/daemons/service_op.c +++ - 2007-11-30 21:36:31.603381000 +0000 @@ -0,0 +1,189 @@ +/* + Copyright Red Hat, Inc. 2007 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License version 2 as published + by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + MA 02139, USA. +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * Send a message to the target node to start the service. + */ +int svc_start_remote(char *svcName, int request, uint32_t target); +void svc_report_failure(char *); +int get_service_state_internal(char *svcName, rg_state_t *svcStatus); + + +/** + * + */ +int +service_op_start(char *svcName, + int *target_list, + int target_list_len, + int *new_owner) +{ + int target; + int ret, x; + rg_state_t svcStatus; + + if (get_service_state_internal(svcName, &svcStatus) < 0) { + return RG_EFAIL; + } + + if (svcStatus.rs_state == RG_STATE_FAILED || + svcStatus.rs_state == RG_STATE_UNINITIALIZED) + return RG_EINVAL; + + for (x = 0; x < target_list_len; x++) { + + target = target_list[x]; + ret = svc_start_remote(svcName, RG_START_REMOTE, + target); + switch (ret) { + case RG_ERUN: + /* Someone stole the service while we were + trying to start it */ + get_rg_state_local(svcName, &svcStatus); + if (new_owner) + *new_owner = svcStatus.rs_owner; + return 0; + case RG_EDEPEND: + case RG_EFAIL: + continue; + case RG_EABORT: + svc_report_failure(svcName); + return RG_EFAIL; + default: + /* deliberate fallthrough */ + clulog(LOG_ERR, + "#61: Invalid reply from member %d during" + " start operation!\n", target); + case RG_NO: + /* state uncertain */ + clulog(LOG_CRIT, "State Uncertain: svc:%s " + "nid:%d req:%s ret:%d\n", svcName, + target, rg_req_str(RG_START_REMOTE), ret); + return 0; + case 0: + if (new_owner) + *new_owner = target; + clulog(LOG_NOTICE, "Service %s is now running " + "on member %d\n", svcName, (int)target); + return 0; + } + } + + return RG_EFAIL; +} + + +int +service_op_stop(char *svcName, int do_disable, int event_type) +{ + SmMessageSt msg; + int msg_ret; + msgctx_t ctx; + rg_state_t svcStatus; + int msgtarget = my_id(); + + /* Build the message header */ + msg.sm_hdr.gh_magic = GENERIC_HDR_MAGIC; + msg.sm_hdr.gh_command = RG_ACTION_REQUEST; + msg.sm_hdr.gh_arg1 = RG_ACTION_MASTER; + msg.sm_hdr.gh_length = sizeof (SmMessageSt); + + msg.sm_data.d_action = ((!do_disable) ? RG_STOP:RG_DISABLE); + + if (msg.sm_data.d_action == RG_STOP && event_type == EVENT_USER) + msg.sm_data.d_action = RG_STOP_USER; + + strncpy(msg.sm_data.d_svcName, svcName, + sizeof(msg.sm_data.d_svcName)); + msg.sm_data.d_ret = 0; + msg.sm_data.d_svcOwner = 0; + + /* Open a connection to the local node - it will decide what to + do in this case. XXX inefficient; should queue requests + locally and immediately forward requests otherwise */ + + if (get_service_state_internal(svcName, &svcStatus) < 0) + return RG_EFAIL; + if (svcStatus.rs_owner > 0) + msgtarget = svcStatus.rs_owner; + + if (msg_open(MSG_CLUSTER, msgtarget, RG_PORT, &ctx, 2)< 0) { + clulog(LOG_ERR, + "#58: Failed opening connection to member #%d\n", + my_id()); + return -1; + } + + /* Encode */ + swab_SmMessageSt(&msg); + + /* Send stop message to the other node */ + if (msg_send(&ctx, &msg, sizeof (SmMessageSt)) < + (int)sizeof (SmMessageSt)) { + clulog(LOG_ERR, "Failed to send complete message\n"); + msg_close(&ctx); + return -1; + } + + /* Check the response */ + do { + msg_ret = msg_receive(&ctx, &msg, + sizeof (SmMessageSt), 10); + if ((msg_ret == -1 && errno != ETIMEDOUT) || + (msg_ret >= 0)) { + break; + } + } while(1); + + if (msg_ret != sizeof (SmMessageSt)) { + clulog(LOG_WARNING, "Strange response size: %d vs %d\n", + msg_ret, (int)sizeof(SmMessageSt)); + return 0; /* XXX really UNKNOWN */ + } + + /* Got a valid response from other node. */ + msg_close(&ctx); + + /* Decode */ + swab_SmMessageSt(&msg); + + return msg.sm_data.d_ret; +} + + +/* + TODO + service_op_migrate() + */ + /cvs/cluster/cluster/rgmanager/src/daemons/slang_event.c,v --> standard output revision 1.1 --- cluster/rgmanager/src/daemons/slang_event.c +++ - 2007-11-30 21:36:31.687551000 +0000 @@ -0,0 +1,1228 @@ +/* + Copyright Red Hat, Inc. 2007 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License version 2 as published + by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 675 Mass Ave, Cambridge, + MA 02139, USA. +*/ +/** + @file S/Lang event handling & intrinsic functions + vars + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static int __sl_initialized = 0; + +static char **_service_list = NULL; +static int _service_list_len = 0; + +char **get_service_names(int *len); /* from groups.c */ +int get_service_property(char *rg_name, char *prop, char *buf, size_t buflen); +void push_int_array(int *stuff, int len); + + +/* ================================================================ + * Node states + * ================================================================ */ +static const int + _ns_online = 1, + _ns_offline = 0; + +/* ================================================================ + * Event information + * ================================================================ */ +static const int + _ev_none = EVENT_NONE, + _ev_node = EVENT_NODE, + _ev_service = EVENT_RG, + _ev_config = EVENT_CONFIG, + _ev_user = EVENT_USER; + +static const int + _rg_fail = RG_EFAIL, + _rg_success = RG_ESUCCESS, + _rg_edomain = RG_EDOMAIN, + _rg_edepend = RG_EDEPEND, + _rg_eabort = RG_EABORT, + _rg_einval = RG_EINVAL, + _rg_erun = RG_ERUN; + +static int + _stop_processing = 0, + _my_node_id = 0, + _node_state = 0, + _node_id = 0, + _node_clean = 0, + _service_owner = 0, + _service_last_owner = 0, + _user_request = 0, + _user_arg1 = 0, + _user_arg2 = 0, + _user_return = 0, + _rg_err = 0, + _event_type = 0; + +static char + *_node_name = NULL, + *_service_name = NULL, + *_service_state = NULL, + *_rg_err_str = "No Error"; + +static int + _user_enable = RG_ENABLE, + _user_disable = RG_DISABLE, + _user_stop = RG_STOP_USER, /* From clusvcadm */ + _user_relo = RG_RELOCATE, + _user_restart = RG_RESTART, + _user_migrate = RG_MIGRATE; + + +SLang_Intrin_Var_Type rgmanager_vars[] = +{ + /* Log levels (constants) */ + + /* Node state information */ + MAKE_VARIABLE("NODE_ONLINE", &_ns_online, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("NODE_OFFLINE", &_ns_offline, SLANG_INT_TYPE, 1), + + /* Node event information */ + MAKE_VARIABLE("node_self", &_my_node_id, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("node_state", &_node_state, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("node_id", &_node_id, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("node_name", &_node_name, SLANG_STRING_TYPE,1), + MAKE_VARIABLE("node_clean", &_node_clean, SLANG_INT_TYPE, 1), + + /* Service event information */ + MAKE_VARIABLE("service_name", &_service_name, SLANG_STRING_TYPE,1), + MAKE_VARIABLE("service_state", &_service_state,SLANG_STRING_TYPE,1), + MAKE_VARIABLE("service_owner", &_service_owner,SLANG_INT_TYPE, 1), + MAKE_VARIABLE("service_last_owner", &_service_last_owner, + SLANG_INT_TYPE, 1), + + /* User event information */ + MAKE_VARIABLE("user_request", &_user_request, SLANG_INT_TYPE,1), + MAKE_VARIABLE("user_arg1", &_user_arg1, SLANG_INT_TYPE,1), + MAKE_VARIABLE("user_arg2", &_user_arg2, SLANG_INT_TYPE,1), + MAKE_VARIABLE("user_service", &_service_name, SLANG_STRING_TYPE,1), + MAKE_VARIABLE("user_target", &_service_owner,SLANG_INT_TYPE, 1), + /* Return code to user requests; i.e. clusvcadm */ + MAKE_VARIABLE("user_return", &_user_return, SLANG_INT_TYPE, 0), + + /* General event information */ + MAKE_VARIABLE("event_type", &_event_type, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("EVENT_NONE", &_ev_none, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("EVENT_NODE", &_ev_node, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("EVENT_CONFIG", &_ev_config, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("EVENT_SERVICE", &_ev_service, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("EVENT_USER", &_ev_user, SLANG_INT_TYPE, 1), + + /* User request constants */ + MAKE_VARIABLE("USER_ENABLE", &_user_enable, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("USER_DISABLE", &_user_disable, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("USER_STOP", &_user_stop, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("USER_RELOCATE", &_user_relo, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("USER_RESTART", &_user_restart, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("USER_MIGRATE", &_user_migrate, SLANG_INT_TYPE, 1), + + /* Errors */ + MAKE_VARIABLE("rg_error", &_rg_err, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("rg_error_string",&_rg_err_str, SLANG_STRING_TYPE,1), + + /* From constants.c */ + MAKE_VARIABLE("FAIL", &_rg_fail, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("SUCCESS", &_rg_success, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("ERR_ABORT", &_rg_eabort, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("ERR_INVALID", &_rg_einval, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("ERR_DEPEND", &_rg_edepend, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("ERR_DOMAIN", &_rg_edomain, SLANG_INT_TYPE, 1), + MAKE_VARIABLE("ERR_RUNNING", &_rg_erun, SLANG_INT_TYPE, 1), + + SLANG_END_INTRIN_VAR_TABLE +}; + + +#define rg_error(errortype) \ +do { \ + _rg_err = errortype; \ + _rg_err_str = ##errortype; \ +} while(0) + + +int +get_service_state_internal(char *svcName, rg_state_t *svcStatus) +{ + struct dlm_lksb lock; + char buf[32]; + + get_rg_state_local(svcName, svcStatus); + if (svcStatus->rs_state == RG_STATE_UNINITIALIZED) { + if (rg_lock(svcName, &lock) < 0) { + errno = ENOLCK; + return -1; + } + + if (get_rg_state(svcName, svcStatus) < 0) { + errno = ENOENT; + rg_unlock(&lock); + return -1; + } + + if (get_service_property(svcName, "autostart", + buf, sizeof(buf)) == 0) { + if (buf[0] == '0' || !strcasecmp(buf, "no")) { + svcStatus->rs_state = RG_STATE_DISABLED; + } else { + svcStatus->rs_state = RG_STATE_STOPPED; + } + } + + set_rg_state(svcName, svcStatus); + + rg_unlock(&lock); + } + + return 0; +} + + +/* + (restarts, last_owner, owner, state) = get_service_status(servicename) + */ +void +sl_service_status(char *svcName) +{ + rg_state_t svcStatus; + char *state_str; + + if (get_service_state_internal(svcName, &svcStatus) < 0) { + SLang_verror(SL_RunTime_Error, + "%s: Failed to get status for %s", + __FUNCTION__, + svcName); + return; + } + + if (SLang_push_integer(svcStatus.rs_restarts) < 0) { + SLang_verror(SL_RunTime_Error, + "%s: Failed to push restarts for %s", + __FUNCTION__, + svcName); + return; + } + + if (SLang_push_integer(svcStatus.rs_last_owner) < 0) { + SLang_verror(SL_RunTime_Error, + "%s: Failed to push last owner of %s", + __FUNCTION__, + svcName); + return; + } + + switch(svcStatus.rs_state) { + case RG_STATE_DISABLED: + case RG_STATE_STOPPED: + case RG_STATE_FAILED: + case RG_STATE_RECOVER: + case RG_STATE_ERROR: + /* There is no owner for these states. Ever. */ + svcStatus.rs_owner = -1; + } + + if (SLang_push_integer(svcStatus.rs_owner) < 0) { + SLang_verror(SL_RunTime_Error, + "%s: Failed to push owner of %s", + __FUNCTION__, + svcName); + return; + } + + state_str = strdup(rg_state_str(svcStatus.rs_state)); + if (!state_str) { + SLang_verror(SL_RunTime_Error, + "%s: Failed to duplicate state of %s", + __FUNCTION__, + svcName); + return; + } + + if (SLang_push_malloced_string(state_str) < 0) { + SLang_verror(SL_RunTime_Error, + "%s: Failed to push state of %s", + __FUNCTION__, + svcName); + free(state_str); + } +} + + +/** + (nofailback, restricted, ordered, nodelist) = service_domain_info(svcName); + */ +void +sl_domain_info(char *svcName) +{ + int *nodelist = NULL, listlen; + char buf[64]; + int flags = 0; + + if (get_service_property(svcName, "domain", buf, sizeof(buf)) < 0) { + /* no nodes */ + SLang_push_integer(0); + + /* no domain? */ +/* + str = strdup("none"); + if (SLang_push_malloced_string(str) < 0) { + free(state_str); + return; + } +*/ + + /* not ordered */ + SLang_push_integer(0); + /* not restricted */ + SLang_push_integer(0); + /* nofailback not set */ + SLang_push_integer(0); + } + + if (node_domain_set_safe(buf, &nodelist, &listlen, &flags) < 0) { + SLang_push_integer(0); + SLang_push_integer(0); + SLang_push_integer(0); + SLang_push_integer(0); + return; + } + + SLang_push_integer(!!(flags & FOD_NOFAILBACK)); + SLang_push_integer(!!(flags & FOD_RESTRICTED)); + SLang_push_integer(!!(flags & FOD_ORDERED)); + + push_int_array(nodelist, listlen); + free(nodelist); + +/* + str = strdup(buf); + if (SLang_push_malloced_string(str) < 0) { + free(state_str); + return; + } +*/ +} + + +static int +get_int_array(int **nodelist, int *len) +{ + SLang_Array_Type *a = NULL; + SLindex_Type i; + int *nodes = NULL, t, ret = -1; + + if (!nodelist || !len) + return -1; + + t = SLang_peek_at_stack(); + if (t == SLANG_INT_TYPE) { + + nodes = malloc(sizeof(int) * 1); + if (!nodes) + goto out; + if (SLang_pop_integer(&nodes[0]) < 0) + goto out; + + *len = 1; + ret = 0; + + } else if (t == SLANG_ARRAY_TYPE) { + if (SLang_pop_array_of_type(&a, SLANG_INT_TYPE) < 0) + goto out; + if (a->num_dims > 1) + goto out; + if (a->dims[0] < 0) + goto out; + nodes = malloc(sizeof(int) * a->dims[0]); + if (!nodes) + goto out; + for (i = 0; i < a->dims[0]; i++) + SLang_get_array_element(a, &i, &nodes[i]); + + *len = a->dims[0]; + ret = 0; + } + +out: + if (a) + SLang_free_array(a); + if (ret == 0) { + *nodelist = nodes; + } else { + if (nodes) + free(nodes); + } + + return ret; +} + + +/** + get_service_property(service_name, property) + */ +char * +sl_service_property(char *svcName, char *prop) +{ + char buf[96]; + + if (get_service_property(svcName, prop, buf, sizeof(buf)) < 0) + return NULL; + + /* does this work or do I have to push a malloce'd string? */ + return strdup(buf); +} + + +/** + usage: + + stop_service(name, disable_flag); + */ +int +sl_stop_service(void) +{ + char *svcname = NULL; + int nargs, t, ret = -1; + int do_disable = 0; + + nargs = SLang_Num_Function_Args; + + /* Takes one or two args */ + if (nargs <= 0 || nargs > 2) { + SLang_verror(SL_Syntax_Error, + "%s: Wrong # of args (%d), must be 1 or 2\n", + __FUNCTION__, + nargs); + return -1; + } + + if (nargs == 2) { + t = SLang_peek_at_stack(); + if (t != SLANG_INT_TYPE) { + SLang_verror(SL_Syntax_Error, + "%s: expected type %d got %d\n", + __FUNCTION__, SLANG_INT_TYPE, t); + goto out; + } + + if (SLang_pop_integer(&do_disable) < 0) { + SLang_verror(SL_Syntax_Error, + "%s: Failed to pop integer from stack!\n", + __FUNCTION__); + goto out; + } + + --nargs; + } + + if (nargs == 1) { + t = SLang_peek_at_stack(); + if (t != SLANG_STRING_TYPE) { + SLang_verror(SL_Syntax_Error, + "%s: expected type %d got %d\n", + __FUNCTION__, + SLANG_STRING_TYPE, t); + goto out; + } + + if (SLpop_string(&svcname) < 0) { + SLang_verror(SL_Syntax_Error, + "%s: Failed to pop string from stack!\n", + __FUNCTION__); + goto out; + } + } + + /* TODO: Meat of function goes here */ + ret = service_op_stop(svcname, do_disable, _event_type); +out: + if (svcname) + free(svcname); + _user_return = ret; + return ret; +} + + +/** + usage: + + start_service(name, ordered_node_list_allowed, + node_list_illegal) + */ +int +sl_start_service(void) +{ + char *svcname = NULL; + int *pref_list = NULL, pref_list_len = 0; + int *illegal_list = NULL, illegal_list_len = 0; + int nargs, t, x, ret = -1; + + nargs = SLang_Num_Function_Args; + + /* Takes one, two, or three */ + if (nargs <= 0 || nargs > 3) { + SLang_verror(SL_Syntax_Error, + "%s: Wrong # of args (%d), must be 1 or 2\n", + __FUNCTION__, nargs); + return -1; + } + + if (nargs == 3) { + if (get_int_array(&illegal_list, &illegal_list_len) < 0) + goto out; + --nargs; + } + + if (nargs == 2) { + if (get_int_array(&pref_list, &pref_list_len) < 0) + goto out; + --nargs; + } + + if (nargs == 1) { + /* Just get the service name */ + t = SLang_peek_at_stack(); + if (t != SLANG_STRING_TYPE) { + SLang_verror(SL_Syntax_Error, + "%s: expected type %d got %d\n", + __FUNCTION__, + SLANG_STRING_TYPE, t); + goto out; + } + + if (SLpop_string(&svcname) < 0) + goto out; + } + + /* TODO: Meat of function goes here */ + ret = service_op_start(svcname, pref_list, + pref_list_len, &x); ; + +out: + if (svcname) + free(svcname); + if (illegal_list) + free(illegal_list); + if (pref_list) + free(pref_list); + _user_return = ret; + return ret; +} + + +/* Take an array of integers given its length and + push it on to the S/Lang stack */ +void +push_int_array(int *stuff, int len) +{ + SLindex_Type arrlen, x; + SLang_Array_Type *arr; + int i; + + arrlen = len; + arr = SLang_create_array(SLANG_INT_TYPE, 0, NULL, &arrlen, 1); + if (!arr) + return; + + x = 0; + for (x = 0; x < len; x++) { + i = stuff[x]; + SLang_set_array_element(arr, &x, &i); + } + SLang_push_array(arr, 1); +} + + +/* + Returns an array of rgmanager-visible nodes online. How cool is that? + */ +void +sl_nodes_online(void) +{ + int i, *nodes, nodecount = 0; + + cluster_member_list_t *membership = member_list(); + if (!membership) + return; + nodes = malloc(sizeof(int) * membership->cml_count); + if (!nodes) + return; + + nodecount = 0; + for (i = 0; i < membership->cml_count; i++) { + if (membership->cml_members[i].cn_member && + membership->cml_members[i].cn_nodeid != 0) { + nodes[nodecount] = membership->cml_members[i].cn_nodeid; + ++nodecount; + } + } + free_member_list(membership); + push_int_array(nodes, nodecount); + free(nodes); +} + + +/* + Returns an array of rgmanager-defined services, in type:name format + We allocate/kill this list *once* per event to ensure we don't leak + memory + */ +void +sl_service_list(void) +{ + SLindex_Type svccount = _service_list_len, x = 0; + SLang_Array_Type *svcarray; + + svcarray = SLang_create_array(SLANG_STRING_TYPE, 0, NULL, &svccount, 1); + if (!svcarray) + return; + + for (; x < _service_list_len; x++) + SLang_set_array_element(svcarray, &x, &_service_list[x]); + + SLang_push_array(svcarray, 1); +} + + +/* s_union hook (see sets.c) */ +void +sl_union(void) +{ + int *arr1 = NULL, a1len = 0; + int *arr2 = NULL, a2len = 0; + int *ret = NULL, retlen = 0; + int nargs = SLang_Num_Function_Args; + + if (nargs != 2) + return; + + /* Remember: args on the stack are reversed */ + get_int_array(&arr2, &a2len); + get_int_array(&arr1, &a1len); + s_union(arr1, a1len, arr2, a2len, &ret, &retlen); + push_int_array(ret, retlen); + if (arr1) + free(arr1); + if (arr2) + free(arr2); + if (ret) + free(ret); + return; +} + + +/* s_intersection hook (see sets.c) */ +void +sl_intersection(void) +{ + int *arr1 = NULL, a1len = 0; + int *arr2 = NULL, a2len = 0; + int *ret = NULL, retlen = 0; + int nargs = SLang_Num_Function_Args; + + if (nargs != 2) + return; + + /* Remember: args on the stack are reversed */ + get_int_array(&arr2, &a2len); + get_int_array(&arr1, &a1len); + s_intersection(arr1, a1len, arr2, a2len, &ret, &retlen); + push_int_array(ret, retlen); + if (arr1) + free(arr1); + if (arr2) + free(arr2); + if (ret) + free(ret); + return; +} + + +/* s_delta hook (see sets.c) */ +void +sl_delta(void) +{ + int *arr1 = NULL, a1len = 0; + int *arr2 = NULL, a2len = 0; + int *ret = NULL, retlen = 0; + int nargs = SLang_Num_Function_Args; + + if (nargs != 2) + return; + + /* Remember: args on the stack are reversed */ + get_int_array(&arr2, &a2len); + get_int_array(&arr1, &a1len); + s_delta(arr1, a1len, arr2, a2len, &ret, &retlen); + push_int_array(ret, retlen); + if (arr1) + free(arr1); + if (arr2) + free(arr2); + if (ret) + free(ret); + return; +} + + +/* s_subtract hook (see sets.c) */ +void +sl_subtract(void) +{ + int *arr1 = NULL, a1len = 0; + int *arr2 = NULL, a2len = 0; + int *ret = NULL, retlen = 0; + int nargs = SLang_Num_Function_Args; + + if (nargs != 2) + return; + + /* Remember: args on the stack are reversed */ + get_int_array(&arr2, &a2len); + get_int_array(&arr1, &a1len); + s_subtract(arr1, a1len, arr2, a2len, &ret, &retlen); + push_int_array(ret, retlen); + if (arr1) + free(arr1); + if (arr2) + free(arr2); + if (ret) + free(ret); + return; +} + + +/* Shuffle array (see sets.c) */ +void +sl_shuffle(void) +{ + int *arr1 = NULL, a1len = 0; + int nargs = SLang_Num_Function_Args; + + if (nargs != 1) + return; + + /* Remember: args on the stack are reversed */ + get_int_array(&arr1, &a1len); + s_shuffle(arr1, a1len); + push_int_array(arr1, a1len); + if (arr1) + free(arr1); + return; +} + + +/* Converts an int array to a string so we can log it in one shot */ +static int +array_to_string(char *buf, int buflen, int *array, int arraylen) +{ + char intbuf[16]; + int x, len, remain = buflen; + + memset(intbuf, 0, sizeof(intbuf)); + memset(buf, 0, buflen); + len = snprintf(buf, buflen - 1, "[ "); + if (len == buflen) + return -1; + + remain -= len; + for (x = 0; x < arraylen; x++) { + len = snprintf(intbuf, sizeof(intbuf) - 1, "%d ", array[x]); + remain -= len; + if (remain > 0) { + strncat(buf, intbuf, len); + } else { + return -1; + } + } + + len = snprintf(intbuf, sizeof(intbuf) - 1 , "]"); + remain -= len; + if (remain > 0) { + strncat(buf, intbuf, len); + } else { + return -1; + } + return (buflen - remain); +} + + +/** + Start at the end of the arg list and work backwards, prepending a string. + This does not support standard clulog / printf formattting; rather, we + just allow integers / strings to be mixed on the stack, figure out the + type, convert it to the right type, and prepend it on to our log message + + The last must be a log level, as specified above: + LOG_DEBUG + ... + LOG_EMERG + + This matches up with clulog / syslog mappings in the var table; the above + are constants in the S/Lang interpreter. Any number of arguments may + be provided. Examples are: + + log(LOG_INFO, "String", 1, "string2"); + + Result: String1string2 + + log(LOG_INFO, "String ", 1, " string2"); + + Result: String 1 string2 + + */ +void +sl_clulog(int level) +{ + int t, nargs, len; + //int level; + int s_intval; + char *s_strval; + int *nodes = 0, nlen = 0; + char logbuf[512]; + char tmp[256]; + int need_free; + int remain = sizeof(logbuf)-2; + + nargs = SLang_Num_Function_Args; + if (nargs < 1) + return; + + memset(logbuf, 0, sizeof(logbuf)); + memset(tmp, 0, sizeof(tmp)); + logbuf[sizeof(logbuf)-1] = 0; + logbuf[sizeof(logbuf)-2] = '\n'; + + while (nargs && (t = SLang_peek_at_stack()) >= 0 && remain) { + switch(t) { + case SLANG_ARRAY_TYPE: + if (get_int_array(&nodes, &nlen) < 0) + return; + len = array_to_string(tmp, sizeof(tmp), + nodes, nlen); + if (len < 0) { + free(nodes); + return; + } + free(nodes); + break; + case SLANG_INT_TYPE: + if (SLang_pop_integer(&s_intval) < 0) + return; + len=snprintf(tmp, sizeof(tmp) - 1, "%d", s_intval); + break; + case SLANG_STRING_TYPE: + need_free = 0; + if (SLpop_string(&s_strval) < 0) + return; + len=snprintf(tmp, sizeof(tmp) - 1, "%s", s_strval); + SLfree(s_strval); + break; + default: + need_free = 0; + len=snprintf(tmp, sizeof(tmp) - 1, + "{UnknownType %d}", t); + break; + } + + --nargs; + + if (len > remain) + return; + remain -= len; + + memcpy(&logbuf[remain], tmp, len); + } + +#if 0 + printf("<%d> %s\n", level, &logbuf[remain]); +#endif + clulog(level, &logbuf[remain]); + return; +} + + +/* Logging functions */ +void +sl_log_debug(void) +{ + sl_clulog(LOG_DEBUG); +} + + +void +sl_log_info(void) +{ + sl_clulog(LOG_INFO); +} + + +void +sl_log_notice(void) +{ + sl_clulog(LOG_NOTICE); +} + + +void +sl_log_warning(void) +{ + sl_clulog(LOG_WARNING); +} + + +void +sl_log_err(void) +{ + sl_clulog(LOG_ERR); +} + + +void +sl_log_crit(void) +{ + sl_clulog(LOG_CRIT); +} + + +void +sl_log_alert(void) +{ + sl_clulog(LOG_ALERT); +} + + +void +sl_log_emerg(void) +{ + sl_clulog(LOG_EMERG); +} + + +void +sl_die(void) +{ + _stop_processing = 1; + return; +} + + +SLang_Intrin_Fun_Type rgmanager_slang[] = +{ + MAKE_INTRINSIC_0("nodes_online", sl_nodes_online, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("service_list", sl_service_list, SLANG_VOID_TYPE), + + MAKE_INTRINSIC_SS("service_property", sl_service_property, + SLANG_STRING_TYPE), + MAKE_INTRINSIC_S("service_domain_info", sl_domain_info, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("service_stop", sl_stop_service, SLANG_INT_TYPE), + MAKE_INTRINSIC_0("service_start", sl_start_service, SLANG_INT_TYPE), + MAKE_INTRINSIC_S("service_status", sl_service_status, + SLANG_VOID_TYPE), + + /* Node list manipulation */ + MAKE_INTRINSIC_0("union", sl_union, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("intersection", sl_intersection, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("delta", sl_delta, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("subtract", sl_subtract, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("shuffle", sl_shuffle, SLANG_VOID_TYPE), + + /* Logging */ + MAKE_INTRINSIC_0("debug", sl_log_debug, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("info", sl_log_info, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("notice", sl_log_notice, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("warning", sl_log_warning, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("err", sl_log_err, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("crit", sl_log_crit, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("alert", sl_log_alert, SLANG_VOID_TYPE), + MAKE_INTRINSIC_0("emerg", sl_log_emerg, SLANG_VOID_TYPE), + + MAKE_INTRINSIC_0("stop_processing", sl_die, SLANG_VOID_TYPE), + + SLANG_END_INTRIN_FUN_TABLE +}; + + +/* Hook for when we generate a script error */ +void +rgmanager_slang_error_hook(char *errstr) +{ + /* Don't just send errstr, because it might contain + "%s" for example which would result in a crash! + plus, we like the newline :) */ + clulog(LOG_ERR, "[S/Lang] %s\n", errstr); +} + + + +/* ================================================================ + * S/Lang initialization + * ================================================================ */ +int +do_init_slang(void) +{ + SLang_init_slang(); + SLang_init_slfile(); + + if (SLadd_intrin_fun_table(rgmanager_slang, NULL) < 0) + return 1; + if (SLadd_intrin_var_table (rgmanager_vars, NULL) < 0) + return 1; + + /* TODO: Make rgmanager S/Lang conformant. Though, it + might be a poor idea to provide access to all the + S/Lang libs */ + SLpath_set_load_path(RESOURCE_ROOTDIR); + + _my_node_id = my_id(); + __sl_initialized = 1; + + SLang_Error_Hook = rgmanager_slang_error_hook; + + return 0; +} + + +/* + Execute a script / file and return the result to the caller + Log an error if we receive one. + */ +int +do_slang_run(const char *file, const char *script) +{ + int ret = 0; + + if (file) + ret = SLang_load_file((char *)file); + else + ret = SLang_load_string((char *)script); + + if (ret < 0) { + clulog(LOG_ERR, "[S/Lang] Script Execution Failure\n"); + SLang_restart(1); + } + + return ret; +} + + +int +S_node_event(const char *file, const char *script, int nodeid, + int state, int clean) +{ + int ret; + cluster_member_list_t *membership = member_list(); + + _node_name = strdup(memb_id_to_name(membership, nodeid)); + _node_state = state; + _node_clean = clean; + _node_id = nodeid; + free_member_list(membership); + + ret = do_slang_run(file, script); + + _node_state = 0; + _node_clean = 0; + _node_id = 0; + if (_node_name) + free(_node_name); + _node_name = NULL; + + return ret; +} + + +int +S_service_event(const char *file, const char *script, char *name, + int state, int owner, int last_owner) +{ + int ret; + + _service_name = name; + _service_state = (char *)rg_state_str(state); + _service_owner = owner; + _service_last_owner = last_owner; + + switch(state) { + case RG_STATE_DISABLED: + case RG_STATE_STOPPED: + case RG_STATE_FAILED: + case RG_STATE_RECOVER: + case RG_STATE_ERROR: + /* There is no owner for these states. Ever. */ + _service_owner = -1; + } + + ret = do_slang_run(file, script); + + _service_name = NULL; + _service_state = 0; + _service_owner = 0; + _service_last_owner = 0; + + return ret; +} + + +int +S_user_event(const char *file, const char *script, char *name, + int request, int arg1, int arg2, int target, msgctx_t *ctx) +{ + int ret = RG_SUCCESS; + + _service_name = name; + _service_owner = target; + _user_request = request; + _user_arg1 = arg1; + _user_arg2 = arg2; + _user_return = 0; + + ret = do_slang_run(file, script); + if (ret < 0) { + _user_return = RG_ESCRIPT; + } + + _service_name = NULL; + _service_owner = 0; + _user_request = 0; + _user_arg1 = 0; + _user_arg2 = 0; + + /* XXX Send response code to caller - that 0 should be the + new service owner, if there is one */ + if (ctx) { + send_ret(ctx, name, _user_return, request, 0); + msg_close(ctx); + msg_free_ctx(ctx); + } + _user_return = 0; + return ret; +} + + +int +slang_do_script(event_t *pattern, event_t *ev) +{ + _event_type = ev->ev_type; + int ret = 0; + + switch(ev->ev_type) { + case EVENT_NODE: + ret = S_node_event( + pattern->ev_script_file, + pattern->ev_script, + ev->ev.node.ne_nodeid, + ev->ev.node.ne_state, + ev->ev.node.ne_clean); + break; + case EVENT_RG: + ret = S_service_event( + pattern->ev_script_file, + pattern->ev_script, + ev->ev.group.rg_name, + ev->ev.group.rg_state, + ev->ev.group.rg_owner, + ev->ev.group.rg_last_owner); + break; + case EVENT_USER: + ret = S_user_event( + pattern->ev_script_file, + pattern->ev_script, + ev->ev.user.u_name, + ev->ev.user.u_request, + ev->ev.user.u_arg1, + ev->ev.user.u_arg2, + ev->ev.user.u_target, + ev->ev.user.u_ctx); + break; + default: + break; + } + + _event_type = EVENT_NONE; + return ret; +} + + + +/** + Process an event given our event table and the event that + occurred. Note that the caller is responsible for freeing the + event - do not free (ev) ... + */ +int +slang_process_event(event_table_t *event_table, event_t *ev) +{ + int x, y; + event_t *pattern; + + if (!__sl_initialized) + do_init_slang(); + + /* Get the service list once before processing events */ + if (!_service_list || !_service_list_len) + _service_list = get_service_names(&_service_list_len); + + _stop_processing = 0; + for (x = 1; x <= event_table->max_prio; x++) { + list_for(&event_table->entries[x], pattern, y) { + if (event_match(pattern, ev)) + slang_do_script(pattern, ev); + if (_stop_processing) + goto out; + } + } + + /* Default level = 0 */ + list_for(&event_table->entries[0], pattern, y) { + if (event_match(pattern, ev)) + slang_do_script(pattern, ev); + if (_stop_processing) + goto out; + } + +out: + /* Free the service list */ + if (_service_list) { + for(x = 0; x < _service_list_len; x++) { + free(_service_list[x]); + } + free(_service_list); + _service_list = NULL; + _service_list_len = 0; + } + + return 0; +} --- cluster/rgmanager/src/daemons/Makefile 2007/11/30 20:36:17 1.24 +++ cluster/rgmanager/src/daemons/Makefile 2007/11/30 21:36:28 1.25 @@ -21,7 +21,6 @@ fo_domain.o \ groups.o \ main.o \ - nodeevent.o \ reslist.o \ resrules.o \ restree.o \ @@ -32,6 +31,9 @@ rg_state.o \ rg_thread.o \ restart_counter.o \ + service_op.o \ + slang_event.o \ + event_config.o \ watchdog.o OBJS2= clurmtabd.o \ @@ -47,7 +49,8 @@ reslist-noccs.o \ resrules-noccs.o \ restree-noccs.o \ - rg_locks-noccs.o + rg_locks-noccs.o \ + event_config-noccs.o CFLAGS += -D_GNU_SOURCE -DSHAREDIR=\"${sharedir}\" CFLAGS += -Werror -Wstrict-prototypes -Wshadow -fPIC @@ -65,7 +68,7 @@ CMAN_LDFLAGS += -L${cmanlibdir} -lcman DLM_LDFLAGS += -L${dlmlibdir} -ldlm XML2_LDFLAGS += `xml2-config --libs` -EXTRA_LDFLAGS += -lpthread -ldl +EXTRA_LDFLAGS += -lpthread -ldl -lslang LOCAL_LDFLAGS += -llalloc READLINE_LDFLAGS += -L${readlinelibdir} -lreadline --- cluster/rgmanager/src/daemons/fo_domain.c 2007/11/30 20:36:17 1.14 +++ cluster/rgmanager/src/daemons/fo_domain.c 2007/11/30 21:36:28 1.15 @@ -34,6 +34,7 @@ #include #include #include +#include //#define DEBUG @@ -70,8 +71,7 @@ { fod_node_t *fodn; char xpath[256]; - char *ret, *nid; - int nodeid; + char *ret; snprintf(xpath, sizeof(xpath), "%s/failoverdomainnode[%d]/@name", base, idx); @@ -88,20 +88,6 @@ return NULL; } while (!list_done(&domain->fd_nodes, fodn)); - snprintf(xpath, sizeof(xpath), - "/cluster/clusternodes/clusternode[@name=\"%s\"]/@nodeid", - ret); - if ((ccs_get(ccsfd, xpath, &nid) == 0) && nid) { - nodeid = atoi(nid); - free(nid); - } else { - clulog(LOG_ERR, "#XX: Node %s in domain %s is not in " - "the configuration\n", ret, domain->fd_name); - /* No nodeid == bad failover domain */ - free(ret); - return NULL; - } - fodn = malloc(sizeof(*fodn)); if (!fodn) return NULL; @@ -110,8 +96,24 @@ /* Already malloc'd; simply store */ fodn->fdn_name = ret; fodn->fdn_prio = 0; - fodn->fdn_nodeid = nodeid; + snprintf(xpath, sizeof(xpath), + "/cluster/clusternodes/clusternode[@name=\"%s\"]/@nodeid", + ret); + if (ccs_get(ccsfd, xpath, &ret) != 0) { + clulog(LOG_WARNING, "Node %s has no nodeid attribute\n", + fodn->fdn_name); + fodn->fdn_nodeid = -1; + } else { + /* 64-bit-ism on rhel4? */ + fodn->fdn_nodeid = atoi(ret); + } + + /* Don't even bother getting priority if we're not ordered (it's set + to 0 above */ + if (!(domain->fd_flags & FOD_ORDERED)) + return fodn; + snprintf(xpath, sizeof(xpath), "%s/failoverdomainnode[%d]/@priority", base, idx); if (ccs_get(ccsfd, xpath, &ret) != 0) @@ -270,6 +272,11 @@ { fod_t *fod; fod_node_t *fodn = NULL; + /* + int x; + int *node_set = NULL; + int node_set_len = 0; + */ list_do(domains, fod) { printf("Failover domain: %s\n", fod->fd_name); @@ -286,14 +293,12 @@ printf("\n"); } - list_do(&fod->fd_nodes, fodn) { - printf(" Node %s priority %d", - fodn->fdn_name, fodn->fdn_prio); - if (fodn->fdn_nodeid) { - printf(" nodeid %d", fodn->fdn_nodeid); - } - printf("\n"); - } while (!list_done(&fod->fd_nodes, fodn)); + list_do(&fod->fd_nodes, fodn) { + printf(" Node %s (id %d, priority %d)\n", + fodn->fdn_name, fodn->fdn_nodeid, + fodn->fdn_prio); + } while (!list_done(&fod->fd_nodes, fodn)); + } while (!list_done(domains, fod)); } @@ -359,6 +364,70 @@ } +int +node_domain_set(fod_t *domain, int **ret, int *retlen) +{ + int x, i, j; + int *tmpset; + int ts_count; + + fod_node_t *fodn; + + /* Count domain length */ + list_for(&domain->fd_nodes, fodn, x) { } + + *retlen = 0; + *ret = malloc(sizeof(int) * x); + if (!(*ret)) + return -1; + tmpset = malloc(sizeof(int) * x); + if (!(*tmpset)) + return -1; + + if (domain->fd_flags & FOD_ORDERED) { + for (i = 1; i <= 100; i++) { + + ts_count = 0; + list_for(&domain->fd_nodes, fodn, x) { + if (fodn->fdn_prio == i) { + s_add(tmpset, &ts_count, + fodn->fdn_nodeid); + } + } + + if (!ts_count) + continue; + + /* Shuffle stuff at this prio level */ + if (ts_count > 1) + s_shuffle(tmpset, ts_count); + for (j = 0; j < ts_count; j++) + s_add(*ret, retlen, tmpset[j]); + } + } + + /* Add unprioritized nodes */ + ts_count = 0; + list_for(&domain->fd_nodes, fodn, x) { + if (!fodn->fdn_prio) { + s_add(tmpset, &ts_count, + fodn->fdn_nodeid); + } + } + + if (!ts_count) + return 0; + + /* Shuffle stuff at this prio level */ + if (ts_count > 1) + s_shuffle(tmpset, ts_count); + for (j = 0; j < ts_count; j++) + s_add(*ret, retlen, tmpset[j]); + + return 0; +} + + /** * See if a given nodeid should start a specified service svcid. * --- cluster/rgmanager/src/daemons/groups.c 2007/11/30 20:36:17 1.40 +++ cluster/rgmanager/src/daemons/groups.c 2007/11/30 21:36:28 1.41 @@ -30,6 +30,7 @@ #include #include #include +#include /* Use address field in this because we never use it internally, and there is no extra space in the cman_node_t type. @@ -38,6 +39,8 @@ #define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */ #define cn_svcexcl cn_address.cna_address[1] +extern event_table_t *master_event_table; + static int config_version = 0; static resource_t *_resources = NULL; static resource_rule_t *_rules = NULL; @@ -84,6 +87,32 @@ int +node_domain_set_safe(char *domainname, int **ret, int *retlen, int *flags) +{ + fod_t *fod; + int rv = -1, found = 0, x = 0; + + pthread_rwlock_rdlock(&resource_lock); + + list_for(&_domains, fod, x) { + if (!strcasecmp(fod->fd_name, domainname)) { + found = 1; + break; + } + } // while (!list_done(&_domains, fod)); + + if (found) { + rv = node_domain_set(fod, ret, retlen); + *flags = fod->fd_flags; + } + + pthread_rwlock_unlock(&resource_lock); + + return rv; +} + + +int count_resource_groups(cluster_member_list_t *ml) { resource_t *res; @@ -188,7 +217,7 @@ char rgname[64]; int x; - list_for(&_tree, node, x) { + list_for(tree, node, x) { res = node->rn_resource; res_build_name(rgname, sizeof(rgname), res); @@ -568,6 +597,60 @@ } +char ** +get_service_names(int *len) +{ + resource_node_t *node = NULL; + int nservices, ncopied = 0, x; + char **ret = NULL; + char rg_name[64]; + + pthread_rwlock_rdlock(&resource_lock); + + nservices = 0; + list_do(&_tree, node) { + ++nservices; + } while (!list_done(&_tree, node)); + + ret = malloc(sizeof(char *) * (nservices + 1)); + if (!ret) + goto out_fail; + + memset(ret, 0, sizeof(char *) * (nservices + 1)); + nservices = 0; + list_for(&_tree, node, nservices) { + res_build_name(rg_name, sizeof(rg_name), + node->rn_resource); + + if (!strlen(rg_name)) + continue; + + ret[ncopied] = strdup(rg_name); + if (ret[ncopied]) { + ncopied++; + } else { + goto out_fail; + } + } + + if (len) + *len = ncopied; + pthread_rwlock_unlock(&resource_lock); + return ret; + +out_fail: + pthread_rwlock_unlock(&resource_lock); + for (x = 0; x < ncopied; x++) + free(ret[x]); + if (ret) + free(ret); + return NULL; +} + + + + + /** * Called to decide what services to start locally during a node_event. * Originally a part of node_event, it is now its own function to cut down @@ -1043,6 +1126,48 @@ } +#if 0 +/** + Send the state of the transition master to a given file descriptor. + + @param fd File descriptor to send state to + @param rgname Resource group name whose state we want to send. + @see send_rg_states + */ +void +send_master_state(msgctx_t *ctx) +{ + rg_state_msg_t msg, *msgp = &msg; + event_master_t master; + rg_state_t *rs = &msg.rsm_state; + + strncpy(rs->rs_name, "internal:CentralProcessor", + sizeof(rs->rs_name)); + rs->rs_last_owner = 0; + rs->rs_restarts = 0; + + if (event_master_info_cached(&master) < 0) { + rs->rs_owner = 0; + rs->rs_transition = master.m_master_time; + rs->rs_state = RG_STATE_UNINITIALIZED; + } else { + rs->rs_owner = master.m_nodeid; + rs->rs_transition = master.m_master_time; + rs->rs_state = RG_STATE_STARTED; + } + + msgp->rsm_hdr.gh_magic = GENERIC_HDR_MAGIC; + msgp->rsm_hdr.gh_length = sizeof(msg); + msgp->rsm_hdr.gh_command = RG_STATUS; + + swab_rg_state_msg_t(msgp); + + if (msg_send(ctx, msgp, sizeof(msg)) < 0) + perror("msg_send"); +} +#endif + + /** Send status from a thread because we don't want rgmanager's main thread to block in the case of DLM issues @@ -1067,6 +1192,8 @@ pthread_exit(NULL); } + /*send_master_state(ctx);*/ + pthread_rwlock_rdlock(&resource_lock); list_do(&_tree, node) { @@ -1195,7 +1322,7 @@ Stop changed resources. */ void * -q_status_checks(void *arg) +q_status_checks(void __attribute__ ((unused)) *arg) { resource_node_t *curr; rg_state_t svcblk; @@ -1434,7 +1561,7 @@ int -check_config_update(void) +check_config_update(int *new, int *old) { int newver = 0, fd, ret = 0; char *val = NULL; @@ -1454,6 +1581,8 @@ pthread_mutex_lock(&config_mutex); if (newver && newver != config_version) ret = 1; + if (new) *new = newver; + if (old) *old = config_version; pthread_mutex_unlock(&config_mutex); ccs_unlock(fd); @@ -1477,12 +1606,14 @@ int init_resource_groups(int reconfigure) { - int fd, x; + int fd, x, y, cnt; + event_table_t *evt = NULL; resource_t *reslist = NULL, *res; resource_rule_t *rulelist = NULL, *rule; resource_node_t *tree = NULL; fod_t *domains = NULL, *fod; + event_t *evp; char *val; if (reconfigure) @@ -1543,6 +1674,24 @@ x = 0; list_do(&domains, fod) { ++x; } while (!list_done(&domains, fod)); clulog(LOG_DEBUG, "%d domains defined\n", x); + construct_events(fd, &evt); + cnt = 0; + if (evt) { + for (x=0; x <= evt->max_prio; x++) { + if (!evt->entries[x]) + continue; + + y = 0; + + list_do(&evt->entries[x], evp) { + ++y; + } while (!list_done(&evt->entries[x], evp)); + + cnt += y; + } + } + clulog(LOG_DEBUG, "%d events defined\n", x); + /* Reconfiguration done */ ccs_unlock(fd); @@ -1571,6 +1720,9 @@ if (_domains) deconstruct_domains(&_domains); _domains = domains; + if (master_event_table) + deconstruct_events(&master_event_table); + master_event_table = evt; pthread_rwlock_unlock(&resource_lock); if (reconfigure) { @@ -1612,6 +1764,60 @@ int +get_service_property(char *rg_name, char *prop, char *buf, size_t buflen) +{ + int ret = 0; + resource_t *res; + char *val; + + memset(buf, 0, buflen); + +#if 0 + if (!strcmp(prop, "domain")) { + /* not needed */ + strncpy(buf, "", buflen); + } else if (!strcmp(prop, "autostart")) { + strncpy(buf, "1", buflen); + } else if (!strcmp(prop, "hardrecovery")) { + strncpy(buf, "0", buflen); + } else if (!strcmp(prop, "exclusive")) { + strncpy(buf, "0", buflen); + } else if (!strcmp(prop, "nfslock")) { + strncpy(buf, "0", buflen); + } else if (!strcmp(prop, "recovery")) { + strncpy(buf, "restart", buflen); + } else if (!strcmp(prop, "depend")) { + /* not needed */ + strncpy(buf, "", buflen); + } else { + /* not found / no defaults */ + ret = -1; + } +#endif + + pthread_rwlock_rdlock(&resource_lock); + res = find_root_by_ref(&_resources, rg_name); + if (res) { + val = res_attr_value(res, prop); + if (val) { + ret = 0; + strncpy(buf, val, buflen); + } + } + pthread_rwlock_unlock(&resource_lock); + +#if 0 + if (ret == 0) + printf("%s(%s, %s) = %s\n", __FUNCTION__, rg_name, prop, buf); + else + printf("%s(%s, %s) = NOT FOUND\n", __FUNCTION__, rg_name, prop); +#endif + + return ret; +} + + +int check_restart(char *rg_name) { resource_node_t *node; --- cluster/rgmanager/src/daemons/main.c 2007/11/30 20:36:17 1.45 +++ cluster/rgmanager/src/daemons/main.c 2007/11/30 21:36:28 1.46 @@ -35,6 +35,7 @@ #include #include #include +#include #define L_SHUTDOWN (1<<2) #define L_SYS (1<<1) @@ -55,9 +56,10 @@ void flag_shutdown(int sig); void hard_exit(void); int send_rg_states(msgctx_t *, int); -int check_config_update(void); +int check_config_update(int *, int *); int svc_exists(char *); int watchdog_init(void); +int32_t master_event_callback(char *key, uint64_t viewno, void *data, uint32_t datalen); int shutdown_pending = 0, running = 1, need_reconfigure = 0; char debug = 0; /* XXX* */ @@ -66,11 +68,10 @@ static char *rgmanager_lsname = "rgmanager"; /* XXX default */ int next_node_id(cluster_member_list_t *membership, int me); -int rg_event_q(char *svcName, uint32_t state, int owner); void malloc_dump_table(FILE *, size_t, size_t); void -segfault(int sig) +segfault(int __attribute__ ((unused)) sig) { char ow[64]; @@ -95,13 +96,20 @@ send_node_states(msgctx_t *ctx) { int x; + event_master_t master; generic_msg_hdr hdr; cluster_member_list_t *ml = member_list(); + master.m_nodeid = 0; + event_master_info_cached(&master); + for (x = 0; x < ml->cml_count; x++) { if (ml->cml_members[x].cn_member == 1) { msg_send_simple(ctx, RG_STATUS_NODE, - ml->cml_members[x].cn_nodeid, 0); + ml->cml_members[x].cn_nodeid, + (ml->cml_members[x].cn_nodeid && + (ml->cml_members[x].cn_nodeid == + (int)master.m_nodeid))); } } msg_send_simple(ctx, RG_SUCCESS, 0, 0); @@ -111,7 +119,7 @@ void -flag_reconfigure(int sig) +flag_reconfigure(int __attribute__ ((unused)) sig) { need_reconfigure++; } @@ -168,15 +176,25 @@ new_ml = get_member_list(h); memb_mark_down(new_ml, 0); - for (x = 0; x < new_ml->cml_count; x++) { + for(x=0; new_ml && xcml_count;x++) { + if (new_ml->cml_members[x].cn_nodeid == 0) { + new_ml->cml_members[x].cn_member = 0; + } + } + + for (x = 0; new_ml && x < new_ml->cml_count; x++) { - if (new_ml->cml_members[x].cn_member == 0) + if (new_ml->cml_members[x].cn_member == 0) { + printf("skipping %d - node not member\n", + new_ml->cml_members[x].cn_nodeid); continue; + } if (new_ml->cml_members[x].cn_nodeid == my_id()) continue; #ifdef DEBUG - printf("Checking for listening status of %d\n", new_ml->cml_members[x].cn_nodeid); + printf("Checking for listening status of %d\n", + new_ml->cml_members[x].cn_nodeid); #endif do { @@ -188,6 +206,7 @@ clulog(LOG_DEBUG, "Node %d is not listening\n", new_ml->cml_members[x].cn_nodeid); new_ml->cml_members[x].cn_member = 0; + break; } else if (quorate < 0) { if (errno == ENOTCONN) { new_ml->cml_members[x].cn_member = 0; @@ -277,7 +296,9 @@ int -lock_commit_cb(char *key, uint64_t viewno, void *data, uint32_t datalen) +lock_commit_cb(char __attribute__ ((unused)) *key, + uint64_t __attribute__ ((unused)) viewno, + void *data, uint32_t datalen) { char lockstate; @@ -404,7 +425,7 @@ int dispatch_msg(msgctx_t *ctx, int nodeid, int need_close) { - int ret = 0, sz = -1; + int ret = 0, sz = -1, nid; char msgbuf[4096]; generic_msg_hdr *msg_hdr = (generic_msg_hdr *)msgbuf; SmMessageSt *msg_sm = (SmMessageSt *)msgbuf; @@ -413,7 +434,7 @@ /* Peek-a-boo */ sz = msg_receive(ctx, msg_hdr, sizeof(msgbuf), 1); - if (sz < sizeof (generic_msg_hdr)) { + if (sz < (int)sizeof (generic_msg_hdr)) { clulog(LOG_ERR, "#37: Error receiving header from %d sz=%d CTX %p\n", nodeid, sz, ctx); @@ -423,7 +444,7 @@ if (sz < 0) return -1; - if (sz > sizeof(msgbuf)) { + if (sz > (int)sizeof(msgbuf)) { raise(SIGSTOP); } @@ -442,7 +463,7 @@ goto out; } - if (msg_hdr->gh_length != sz) { + if ((int)msg_hdr->gh_length != sz) { clulog(LOG_ERR, "#XX: Read size mismatch: %d %d\n", ret, msg_hdr->gh_length); goto out; @@ -450,13 +471,13 @@ switch (msg_hdr->gh_command) { case RG_STATUS: - clulog(LOG_DEBUG, "Sending service states to CTX%p\n",ctx); + //clulog(LOG_DEBUG, "Sending service states to CTX%p\n",ctx); if (send_rg_states(ctx, msg_hdr->gh_arg1) == 0) need_close = 0; break; case RG_STATUS_NODE: - clulog(LOG_DEBUG, "Sending node states to CTX%p\n",ctx); + //clulog(LOG_DEBUG, "Sending node states to CTX%p\n",ctx); send_node_states(ctx); break; @@ -475,7 +496,7 @@ case RG_ACTION_REQUEST: - if (sz < sizeof(msg_sm)) { + if (sz < (int)sizeof(msg_sm)) { clulog(LOG_ERR, "#39: Error receiving entire request (%d/%d)\n", ret, (int)sizeof(msg_sm)); @@ -494,14 +515,37 @@ swab_SmMessageSt(msg_sm); if (msg_send(ctx, msg_sm, sizeof (SmMessageSt)) < - sizeof (SmMessageSt)) + (int)sizeof (SmMessageSt)) clulog(LOG_ERR, "#40: Error replying to " "action request.\n"); ret = -1; goto out; } - /* Queue request */ + if (central_events_enabled() && + msg_sm->sm_hdr.gh_arg1 != RG_ACTION_MASTER) { + + /* Centralized processing or request is from + clusvcadm */ + nid = event_master(); + if (nid != my_id()) { + /* Forward the message to the event master */ + forward_message(ctx, msg_sm, nid); + } else { + /* for us: queue it */ + user_event_q(msg_sm->sm_data.d_svcName, + msg_sm->sm_data.d_action, + msg_sm->sm_hdr.gh_arg1, + msg_sm->sm_hdr.gh_arg2, + msg_sm->sm_data.d_svcOwner, + ctx); + } + + return 0; + } + + /* Distributed processing and/or request is from master node + -- Queue request */ rt_enqueue_request(msg_sm->sm_data.d_svcName, msg_sm->sm_data.d_action, ctx, 0, msg_sm->sm_data.d_svcOwner, @@ -511,7 +555,7 @@ case RG_EVENT: /* Service event. Run a dependency check */ - if (sz < sizeof(msg_sm)) { + if (sz < (int)sizeof(msg_sm)) { clulog(LOG_ERR, "#39: Error receiving entire request (%d/%d)\n", ret, (int)sizeof(msg_sm)); @@ -527,7 +571,8 @@ /* Send to our rg event handler */ rg_event_q(msg_sm->sm_data.d_svcName, msg_sm->sm_data.d_action, - msg_sm->sm_data.d_svcOwner); + msg_sm->sm_hdr.gh_arg1, + msg_sm->sm_hdr.gh_arg2); break; case RG_EXITING: @@ -665,7 +710,7 @@ int event_loop(msgctx_t *localctx, msgctx_t *clusterctx) { - int n = 0, max, ret; + int n = 0, max, ret, oldver, newver; fd_set rfds; msgctx_t *newctx; struct timeval tv; @@ -734,10 +779,10 @@ if (!running) return 0; - if (need_reconfigure || check_config_update()) { + if (need_reconfigure || check_config_update(&oldver, &newver)) { need_reconfigure = 0; configure_rgmanager(-1, 0); - init_resource_groups(1); + config_event_q(oldver, newver); return 0; } @@ -756,7 +801,7 @@ void -flag_shutdown(int sig) +flag_shutdown(int __attribute__ ((unused)) sig) { shutdown_pending = 1; } @@ -782,7 +827,7 @@ void -statedump(int sig) +statedump(int __attribute__ ((unused)) sig) { signalled++; } @@ -819,8 +864,15 @@ } if (ccs_get(ccsfd, "/cluster/rm/@transition_throttling", &v) == 0) { - if (!dbg) - set_transition_throttling(atoi(v)); + set_transition_throttling(atoi(v)); + free(v); + } + + if (ccs_get(ccsfd, "/cluster/rm/@central_processing", &v) == 0) { + set_central_events(atoi(v)); + if (atoi(v)) + clulog(LOG_NOTICE, + "Centralized Event Processing enabled\n"); free(v); } @@ -874,7 +926,7 @@ void * -shutdown_thread(void *arg) +shutdown_thread(void __attribute__ ((unused)) *arg) { rg_lockall(L_SYS|L_SHUTDOWN); rg_doall(RG_STOP_EXITING, 1, NULL); @@ -1014,6 +1066,7 @@ } vf_key_init("rg_lockdown", 10, NULL, lock_commit_cb); + vf_key_init("Transition-Master", 10, NULL, master_event_callback); #endif /* --- cluster/rgmanager/src/daemons/restree.c 2007/11/30 21:01:27 1.39 +++ cluster/rgmanager/src/daemons/restree.c 2007/11/30 21:36:28 1.40 @@ -1063,7 +1063,8 @@ /* Ok, it's a 'status' action. See if enough time has elapsed for a given type of status action */ - if (delta < node->rn_actions[x].ra_interval) + if (delta < node->rn_actions[x].ra_interval || + !node->rn_actions[x].ra_interval) continue; if (idx == -1 || --- cluster/rgmanager/src/daemons/rg_event.c 2007/07/23 20:49:13 1.2 +++ cluster/rgmanager/src/daemons/rg_event.c 2007/11/30 21:36:28 1.3 @@ -1,10 +1,9 @@ /* - Copyright Red Hat, Inc. 2006 + Copyright Red Hat, Inc. 2006-2007 This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 2, or (at your option) any - later version. + under the terms of the GNU General Public License version 2 as published + by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -23,81 +22,543 @@ #include #include #include - -typedef struct __rge_q { - list_head(); - char rg_name[128]; - uint32_t rg_state; - int rg_owner; -} rgevent_t; +#include +#include +#include +#include +#include /** * resource group event queue. */ -static rgevent_t *rg_ev_queue = NULL; -static pthread_mutex_t rg_queue_mutex = PTHREAD_MUTEX_INITIALIZER; -static pthread_t rg_ev_thread = 0; +static event_t *event_queue = NULL; +#ifdef WRAP_LOCKS +static pthread_mutex_t event_queue_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; +static pthread_mutex_t mi_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; +#else +static pthread_mutex_t event_queue_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t mi_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif +static pthread_t event_thread = 0; +static int transition_throttling = 5; +static int central_events = 0; + +extern int running; +extern int shutdown_pending; +static int _master = 0; +static struct dlm_lksb _master_lock; +static int _xid = 0; +static event_master_t *mi = NULL; + +void hard_exit(void); +int init_resource_groups(int); +void flag_shutdown(int sig); +void flag_reconfigure(int sig); -void group_event(char *name, uint32_t state, int owner); +event_table_t *master_event_table = NULL; + + +void +set_transition_throttling(int nsecs) +{ + if (nsecs < 0) + nsecs = 0; + transition_throttling = nsecs; +} + + +void +set_central_events(int flag) +{ + central_events = flag; +} + + +int +central_events_enabled(void) +{ + return central_events; +} + + +/** + Called to handle the transition of a cluster member from up->down or + down->up. This handles initializing services (in the local node-up case), + exiting due to loss of quorum (local node-down), and service fail-over + (remote node down). This is the distributed node event processor; + for the local-only node event processor, see slang_event.c + + @param nodeID ID of the member which has come up/gone down. + @param nodeStatus New state of the member in question. + @see eval_groups + */ +void +node_event(int local, int nodeID, int nodeStatus, int clean) +{ + if (!running) + return; + + if (local) { + + /* Local Node Event */ + if (nodeStatus == 0) { + clulog(LOG_ERR, "Exiting uncleanly\n"); + hard_exit(); + } + + if (!rg_initialized()) { + if (init_resource_groups(0) != 0) { + clulog(LOG_ERR, + "#36: Cannot initialize services\n"); + hard_exit(); + } + } + + if (shutdown_pending) { + clulog(LOG_NOTICE, "Processing delayed exit signal\n"); + running = 0; + return; + } + setup_signal(SIGINT, flag_shutdown); + setup_signal(SIGTERM, flag_shutdown); + setup_signal(SIGHUP, flag_reconfigure); + + eval_groups(1, nodeID, 1); + return; + } + + /* + * Nothing to do for events from other nodes if we are not ready. + */ + if (!rg_initialized()) { + clulog(LOG_DEBUG, "Services not initialized.\n"); + return; + } + + eval_groups(0, nodeID, nodeStatus); +} + + +/** + Query CCS to see whether a node has fencing enabled or not in + the configuration. This does not check to see if it's in the + fence domain. + */ +int +node_has_fencing(int nodeid) +{ + int ccs_desc; + char *val = NULL; + char buf[1024]; + int ret = 1; + + ccs_desc = ccs_connect(); + if (ccs_desc < 0) { + clulog(LOG_ERR, "Unable to connect to ccsd; cannot handle" + " node event!\n"); + /* Assume node has fencing */ + return 1; + } + + snprintf(buf, sizeof(buf), + "/cluster/clusternodes/clusternode[@nodeid=\"%d\"]" + "/fence/method/device/@name", nodeid); + + if (ccs_get(ccs_desc, buf, &val) != 0) + ret = 0; + if (val) + free(val); + ccs_disconnect(ccs_desc); + return ret; +} + + +/** + Quick query to cman to see if a node has been fenced. + */ +int +node_fenced(int nodeid) +{ + cman_handle_t ch; + int fenced = 0; + uint64_t fence_time; + + ch = cman_init(NULL); + if (cman_get_fenceinfo(ch, nodeid, &fence_time, &fenced, NULL) < 0) + fenced = 0; + + cman_finish(ch); + + return fenced; +} + + +/** + Callback from view-formation when a commit occurs for the Transition- + Master key. + */ +int32_t +master_event_callback(char *key, uint64_t viewno, + void *data, uint32_t datalen) +{ + event_master_t *m; + + m = data; + if (datalen != (uint32_t)sizeof(*m)) { + clulog(LOG_ERR, "%s: wrong size\n", __FUNCTION__); + return 1; + } + + swab_event_master_t(m); + if (m->m_magic != EVENT_MASTER_MAGIC) { + clulog(LOG_ERR, "%s: wrong size\n", __FUNCTION__); + return 1; + } + + if (m->m_nodeid == my_id()) + clulog(LOG_DEBUG, "Master Commit: I am master\n"); + else + clulog(LOG_DEBUG, "Master Commit: %d is master\n", m->m_nodeid); + + pthread_mutex_lock(&mi_mutex); + if (mi) + free(mi); + mi = m; + pthread_mutex_unlock(&mi_mutex); + + return 0; +} + + +/** + Read the Transition-Master key from vf if it exists. If it doesn't, + attempt to become the transition-master. + */ +static int +find_master(void) +{ + event_master_t *masterinfo = NULL; + void *data; + uint32_t sz; + cluster_member_list_t *m; + uint64_t vn; + int master_id = -1; + + m = member_list(); + if (vf_read(m, "Transition-Master", &vn, + (void **)(&data), &sz) < 0) { + clulog(LOG_ERR, "Unable to discover master" + " status\n"); + masterinfo = NULL; + } else { + masterinfo = (event_master_t *)data; + } + free_member_list(m); + + if (masterinfo && (sz >= sizeof(*masterinfo))) { + swab_event_master_t(masterinfo); + if (masterinfo->m_magic == EVENT_MASTER_MAGIC) { + clulog(LOG_DEBUG, "Master Locate: %d is master\n", + masterinfo->m_nodeid); + pthread_mutex_lock(&mi_mutex); + if (mi) + free(mi); + mi = masterinfo; + pthread_mutex_unlock(&mi_mutex); + master_id = masterinfo->m_nodeid; + } + } + + return master_id; +} + + +/** + Return a copy of the cached event_master_t structure to the + caller. + */ +int +event_master_info_cached(event_master_t *mi_out) +{ + if (!central_events || !mi_out) { + errno = -EINVAL; + return -1; + } + + pthread_mutex_lock(&mi_mutex); + if (!mi) { + pthread_mutex_unlock(&mi_mutex); + errno = -ENOENT; + return -1; + } + + memcpy(mi_out, mi, sizeof(*mi)); + pthread_mutex_unlock(&mi_mutex); + return 0; +} + + +/** + Return the node ID of the master. If none exists, become + the master and return our own node ID. + */ +int +event_master(void) +{ + cluster_member_list_t *m = NULL; + event_master_t masterinfo; + int master_id = -1; + + /* We hold this forever. */ + if (_master) + return my_id(); + + pthread_mutex_lock(&mi_mutex); + if (mi) { + master_id = mi->m_nodeid; + pthread_mutex_unlock(&mi_mutex); + //clulog(LOG_DEBUG, "%d is master\n", mi->m_nodeid); + return master_id; + } + pthread_mutex_unlock(&mi_mutex); + + memset(&_master_lock, 0, sizeof(_master_lock)); + if (clu_lock(LKM_EXMODE, &_master_lock, LKF_NOQUEUE, + "Transition-Master") < 0) { + /* not us, find out who is master */ + return find_master(); + } + + if (_master_lock.sb_status != 0) + return -1; + _master = 1; + m = member_list(); + memset(&masterinfo, 0, sizeof(masterinfo)); + masterinfo.m_magic = EVENT_MASTER_MAGIC; + masterinfo.m_nodeid = my_id(); + masterinfo.m_master_time = (uint64_t)time(NULL); + swab_event_master_t(&masterinfo); + + if (vf_write(m, VFF_IGN_CONN_ERRORS | VFF_RETRY, + "Transition-Master", &masterinfo, + sizeof(masterinfo)) < 0) { + clulog(LOG_ERR, "Unable to advertise master" + " status to all nodes\n"); + } + free_member_list(m); + + return my_id(); +} + + + +void group_event(char *name, uint32_t state, int owner); + +/** + Event handling function. This only stays around as long as + events are on the queue. + */ void * -rg_event_thread(void *arg) +_event_thread_f(void *arg) { - rgevent_t *ev; + event_t *ev; + int notice = 0, count = 0; while (1) { - pthread_mutex_lock(&rg_queue_mutex); - ev = rg_ev_queue; + pthread_mutex_lock(&event_queue_mutex); + ev = event_queue; if (ev) - list_remove(&rg_ev_queue, ev); + list_remove(&event_queue, ev); else break; /* We're outta here */ - pthread_mutex_unlock(&rg_queue_mutex); - group_event(ev->rg_name, ev->rg_state, ev->rg_owner); + ++count; + /* Event thread usually doesn't hang around. When it's + spawned, sleep for this many seconds in order to let + some events queue up */ + if ((count==1) && transition_throttling && !central_events) + sleep(transition_throttling); + + pthread_mutex_unlock(&event_queue_mutex); + + if (ev->ev_type == EVENT_CONFIG) { + /* + clulog(LOG_NOTICE, "Config Event: %d -> %d\n", + ev->ev.config.cfg_oldversion, + ev->ev.config.cfg_version); + */ + init_resource_groups(1); + free(ev); + continue; + } + + if (central_events) { + /* If the master node died or there isn't + one yet, take the master lock. */ + if (event_master() == my_id()) { + slang_process_event(master_event_table, + ev); + } + free(ev); + continue; + /* ALL OF THE CODE BELOW IS DISABLED + when using central_events */ + } + + if (ev->ev_type == EVENT_RG) { + /* + clulog(LOG_NOTICE, "RG Event: %s %s %d\n", + ev->ev.group.rg_name, + rg_state_str(ev->ev.group.rg_state), + ev->ev.group.rg_owner); + */ + group_event(ev->ev.group.rg_name, + ev->ev.group.rg_state, + ev->ev.group.rg_owner); + } else if (ev->ev_type == EVENT_NODE) { + /* + clulog(LOG_NOTICE, "Node Event: %s %d %s %s\n", + ev->ev.node.ne_local?"Local":"Remote", + ev->ev.node.ne_nodeid, + ev->ev.node.ne_state?"UP":"DOWN", + ev->ev.node.ne_clean?"Clean":"Dirty") + */ + + if (ev->ev.node.ne_state == 0 && + !ev->ev.node.ne_clean && + node_has_fencing(ev->ev.node.ne_nodeid)) { + notice = 0; + while (!node_fenced(ev->ev.node.ne_nodeid)) { + if (!notice) { + notice = 1; + clulog(LOG_INFO, "Waiting for " + "node #%d to be fenced\n", + ev->ev.node.ne_nodeid); + } + sleep(2); + } + + if (notice) + clulog(LOG_INFO, "Node #%d fenced; " + "continuing\n", + ev->ev.node.ne_nodeid); + } + + node_event(ev->ev.node.ne_local, + ev->ev.node.ne_nodeid, + ev->ev.node.ne_state, + ev->ev.node.ne_clean); + } free(ev); } + if (!central_events || _master) { + clulog(LOG_DEBUG, "%d events processed\n", count); + } /* Mutex held */ - rg_ev_thread = 0; - pthread_mutex_unlock(&rg_queue_mutex); + event_thread = 0; + pthread_mutex_unlock(&event_queue_mutex); pthread_exit(NULL); } -void -rg_event_q(char *name, uint32_t state, int owner) +static void +insert_event(event_t *ev) { - rgevent_t *ev; pthread_attr_t attrs; + pthread_mutex_lock (&event_queue_mutex); + ev->ev_transaction = ++_xid; + list_insert(&event_queue, ev); + if (event_thread == 0) { + pthread_attr_init(&attrs); + pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED); + pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); + pthread_attr_setstacksize(&attrs, 262144); + + pthread_create(&event_thread, &attrs, _event_thread_f, NULL); + pthread_attr_destroy(&attrs); + } + pthread_mutex_unlock (&event_queue_mutex); +} + + +static event_t * +new_event(void) +{ + event_t *ev; while (1) { - ev = malloc(sizeof(rgevent_t)); + ev = malloc(sizeof(*ev)); if (ev) { break; } sleep(1); } - memset(ev,0,sizeof(*ev)); + ev->ev_type = EVENT_NONE; - strncpy(ev->rg_name, name, 128); - ev->rg_state = state; - ev->rg_owner = owner; - - pthread_mutex_lock (&rg_queue_mutex); - list_insert(&rg_ev_queue, ev); - if (rg_ev_thread == 0) { - pthread_attr_init(&attrs); - pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED); - pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); - pthread_attr_setstacksize(&attrs, 262144); + return ev; +} - pthread_create(&rg_ev_thread, &attrs, rg_event_thread, NULL); - pthread_attr_destroy(&attrs); - } - pthread_mutex_unlock (&rg_queue_mutex); + +void +rg_event_q(char *name, uint32_t state, int owner, int last) +{ + event_t *ev = new_event(); + + ev->ev_type = EVENT_RG; + + strncpy(ev->ev.group.rg_name, name, 128); + ev->ev.group.rg_state = state; + ev->ev.group.rg_owner = owner; + ev->ev.group.rg_last_owner = last; + + insert_event(ev); +} + + +void +node_event_q(int local, int nodeID, int state, int clean) +{ + event_t *ev = new_event(); + + ev->ev_type = EVENT_NODE; + ev->ev.node.ne_state = state; + ev->ev.node.ne_local = local; + ev->ev.node.ne_nodeid = nodeID; + ev->ev.node.ne_clean = clean; + insert_event(ev); } + + +void +config_event_q(int old_version, int new_version) +{ + event_t *ev = new_event(); + + ev->ev_type = EVENT_CONFIG; + ev->ev.config.cfg_version = new_version; + ev->ev.config.cfg_oldversion = old_version; + insert_event(ev); +} + +void +user_event_q(char *svc, int request, + int arg1, int arg2, int target, msgctx_t *ctx) +{ + event_t *ev = new_event(); + + ev->ev_type = EVENT_USER; + strncpy(ev->ev.user.u_name, svc, sizeof(ev->ev.user.u_name)); + ev->ev.user.u_request = request; + ev->ev.user.u_arg1 = arg1; + ev->ev.user.u_arg2 = arg2; + ev->ev.user.u_target = target; + ev->ev.user.u_ctx = ctx; + insert_event(ev); +} + --- cluster/rgmanager/src/daemons/rg_forward.c 2007/08/02 14:53:38 1.11 +++ cluster/rgmanager/src/daemons/rg_forward.c 2007/11/30 21:36:28 1.12 @@ -27,11 +27,21 @@ #include +struct fw_message { + msgctx_t *ctx; + SmMessageSt msg; + int nodeid; +}; + + void -build_message(SmMessageSt *msgp, int action, char *svcName, int target) +build_message(SmMessageSt *msgp, int action, char *svcName, int target, + int arg1, int arg2) { msgp->sm_hdr.gh_magic = GENERIC_HDR_MAGIC; msgp->sm_hdr.gh_command = RG_ACTION_REQUEST; + msgp->sm_hdr.gh_arg1 = arg1; + msgp->sm_hdr.gh_arg2 = arg2; msgp->sm_hdr.gh_length = sizeof(*msgp); msgp->sm_data.d_action = action; strncpy(msgp->sm_data.d_svcName, svcName, @@ -90,7 +100,8 @@ } /* Construct message */ - build_message(&msg, req->rr_request, req->rr_group, req->rr_target); + build_message(&msg, req->rr_request, req->rr_group, req->rr_target, + req->rr_arg0, req->rr_arg1); if (msg_open(MSG_CLUSTER, rgs.rs_owner, RG_PORT, ctx, 10) < 0) { clulog(LOG_DEBUG, "FW: Failed to open channel to %d CTX: %p\n", @@ -166,3 +177,120 @@ pthread_attr_destroy(&attrs); } + + +void * +forwarding_thread_v2(void *arg) +{ + msgctx_t *ctx = NULL, *resp_ctx = NULL; + cluster_member_list_t *m = NULL; + SmMessageSt *msgp = NULL, msg; + int response_code = RG_EAGAIN, ret, target = -1, new_owner = 0; + int retries = 0; + struct fw_message *fwmsg = (struct fw_message *)arg; + + msgp = &fwmsg->msg; + resp_ctx = fwmsg->ctx; + target = fwmsg->nodeid; + + clulog(LOG_DEBUG, "FW: Forwarding SM request to %d\n", + target); + + ctx = msg_new_ctx(); + if (ctx == NULL) { + clulog(LOG_DEBUG, "FW: Failed to allocate socket context: %s\n", + strerror(errno)); + goto out_fail; + } + if (msg_open(MSG_CLUSTER, target, RG_PORT, ctx, 10) < 0) { + clulog(LOG_DEBUG, "FW: Failed to open channel to %d CTX: %p\n", + target, ctx); + goto out_fail; + } + + /* swap + send */ + swab_SmMessageSt(msgp); + if (msg_send(ctx, msgp, sizeof(*msgp)) < sizeof(*msgp)) { + clulog(LOG_DEBUG, "FW: Failed to send message to %d CTX: %p\n", + target, ctx); + goto out_fail; + } + + + /* + * Ok, we're forwarding a message to another node. Keep tabs on + * the node to make sure it doesn't die. Basically, wake up every + * now and again to make sure it's still online. If it isn't, send + * a response back to the caller. + */ + do { + ret = msg_receive(ctx, &msg, sizeof(msg), 10); + if (ret < (int)sizeof(msg)) { + if (ret < 0 && errno == ETIMEDOUT) { + m = member_list(); + if (!memb_online(m, target)) { + response_code = RG_ENODE; + goto out_fail; + } + free_member_list(m); + m = NULL; + continue; + } + + if (ret == 0) + continue; + } + break; + } while(++retries < 60); /* old 600 second rule */ + + swab_SmMessageSt(&msg); + + response_code = msg.sm_data.d_ret; + +out_fail: + free(fwmsg); + + if (resp_ctx) { + send_ret(resp_ctx, msgp->sm_data.d_svcName, response_code, + msgp->sm_data.d_action, new_owner); + msg_close(resp_ctx); + msg_free_ctx(resp_ctx); + } + + if (ctx) { + msg_close(ctx); + msg_free_ctx(ctx); + } + if (m) + free_member_list(m); + + pthread_exit(NULL); +} + + +void +forward_message(msgctx_t *ctx, void *msgp, int nodeid) +{ + pthread_t newthread; + pthread_attr_t attrs; + struct fw_message *fwmsg; + + fwmsg = malloc(sizeof(struct fw_message)); + if (!fwmsg) { + msg_close(ctx); + msg_free_ctx(ctx); + return; + } + + memcpy(&fwmsg->msg, msgp, sizeof(fwmsg->msg)); + fwmsg->ctx = ctx; + fwmsg->nodeid = nodeid; + + pthread_attr_init(&attrs); + pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED); + pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); + pthread_attr_setstacksize(&attrs, 262144); + + pthread_create(&newthread, &attrs, forwarding_thread_v2, fwmsg); + pthread_attr_destroy(&attrs); +} --- cluster/rgmanager/src/daemons/rg_state.c 2007/11/30 20:36:18 1.41 +++ cluster/rgmanager/src/daemons/rg_state.c 2007/11/30 21:36:28 1.42 @@ -36,6 +36,7 @@ #include #include #include +#include /* XXX - copied :( */ #define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */ @@ -86,8 +87,24 @@ } +char * +c_name(char *svcName) +{ + char *ptr, *ret = svcName; + + ptr = strchr(svcName,':'); + if (!ptr) + return ret; + if ((int)(ptr - svcName) == 7 && + !memcmp(svcName, "service", 7)) /* strlen("service") */ + ret = ptr + 1; + + return ret; +} + + void -broadcast_event(char *svcName, uint32_t state) +broadcast_event(char *svcName, uint32_t state, int owner, int last) { SmMessageSt msgp; msgctx_t everyone; @@ -95,10 +112,12 @@ msgp.sm_hdr.gh_magic = GENERIC_HDR_MAGIC; msgp.sm_hdr.gh_command = RG_EVENT; msgp.sm_hdr.gh_length = sizeof(msgp); + msgp.sm_hdr.gh_arg1 = owner; + msgp.sm_hdr.gh_arg2 = last; msgp.sm_data.d_action = state; strncpy(msgp.sm_data.d_svcName, svcName, sizeof(msgp.sm_data.d_svcName)); - msgp.sm_data.d_svcOwner = 0; + msgp.sm_data.d_svcOwner = owner; msgp.sm_data.d_ret = 0; swab_SmMessageSt(&msgp); @@ -201,7 +220,7 @@ void -send_ret(msgctx_t *ctx, char *name, int ret, int orig_request) +send_ret(msgctx_t *ctx, char *name, int ret, int orig_request, int new_owner) { SmMessageSt msg, *msgp = &msg; if (!ctx) @@ -213,7 +232,9 @@ msgp->sm_data.d_action = orig_request; strncpy(msgp->sm_data.d_svcName, name, sizeof(msgp->sm_data.d_svcName)); - msgp->sm_data.d_svcOwner = my_id(); /* XXX Broken */ + if (!new_owner) + new_owner = my_id(); + msgp->sm_data.d_svcOwner = new_owner; /* XXX Broken */ msgp->sm_data.d_ret = ret; swab_SmMessageSt(msgp); @@ -344,6 +365,7 @@ return 0; #else membership = member_list(); + ret = vf_read(membership, res, &viewno, &data, &datalen); if (ret != VFR_OK || datalen == 0) { @@ -666,7 +688,7 @@ /* * Starting failed service... */ - if (req == RG_START_RECOVER) { + if (req == RG_START_RECOVER || central_events_enabled()) { clulog(LOG_NOTICE, "Recovering failed service %s\n", svcName); @@ -698,7 +720,7 @@ case RG_STATE_DISABLED: case RG_STATE_UNINITIALIZED: - if (req == RG_ENABLE) { + if (req == RG_ENABLE || req == RG_START_REMOTE) { /* Don't actually enable if the RG is locked! */ if (rg_locked()) { ret = 3; @@ -825,7 +847,8 @@ "Service %s started\n", svcName); - broadcast_event(svcName, RG_STATE_STARTED); + broadcast_event(svcName, RG_STATE_STARTED, svcStatus.rs_owner, + svcStatus.rs_last_owner); } else { clulog(LOG_WARNING, "#68: Failed to start %s; return value: %d\n", @@ -1299,8 +1322,8 @@ clulog(LOG_NOTICE, "Stopping service %s\n", svcName); - if (recover) - svcStatus.rs_state = RG_STATE_ERROR; + if (recover) + svcStatus.rs_state = RG_STATE_ERROR; else svcStatus.rs_state = RG_STATE_STOPPING; svcStatus.rs_transition = (uint64_t)time(NULL); @@ -1382,7 +1405,7 @@ } rg_unlock(&lockp); - broadcast_event(svcName, newstate); + broadcast_event(svcName, newstate, -1, svcStatus.rs_last_owner); return 0; } @@ -1463,7 +1486,8 @@ } rg_unlock(&lockp); - broadcast_event(svcName, RG_STATE_FAILED); + broadcast_event(svcName, RG_STATE_FAILED, -1, + svcStatus.rs_last_owner); return 0; } @@ -1542,8 +1566,8 @@ /* * Send a message to the target node to start the service. */ -static int -relocate_service(char *svcName, int request, uint32_t target) +int +svc_start_remote(char *svcName, int request, uint32_t target) { SmMessageSt msg_relo; int msg_ret; @@ -1553,6 +1577,8 @@ /* Build the message header */ msg_relo.sm_hdr.gh_magic = GENERIC_HDR_MAGIC; msg_relo.sm_hdr.gh_command = RG_ACTION_REQUEST; + /* XXX XXX */ + msg_relo.sm_hdr.gh_arg1 = RG_ACTION_MASTER; msg_relo.sm_hdr.gh_length = sizeof (SmMessageSt); msg_relo.sm_data.d_action = request; strncpy(msg_relo.sm_data.d_svcName, svcName, @@ -1575,13 +1601,13 @@ if (msg_send(&ctx, &msg_relo, sizeof (SmMessageSt)) < sizeof (SmMessageSt)) { clulog(LOG_ERR, - "#59: Error sending relocate request to member #%d\n", + "#59: Error sending remote-start request to member #%d\n", target); msg_close(&ctx); return -1; } - clulog(LOG_DEBUG, "Sent relocate request to %d\n", (int)target); + clulog(LOG_DEBUG, "Sent remote-start request to %d\n", (int)target); /* Check the response */ do { @@ -1757,7 +1783,7 @@ * It's legal to start the service on the given * node. Try to do so. */ - if (relocate_service(svcName, request, target) == 0) { + if (svc_start_remote(svcName, request, target) == 0) { *new_owner = target; /* * Great! We're done... @@ -1787,7 +1813,7 @@ if (target == me) goto exhausted; - ret = relocate_service(svcName, request, target); + ret = svc_start_remote(svcName, request, target); switch (ret) { case RG_ERUN: /* Someone stole the service while we were @@ -2121,7 +2147,7 @@ } else if (target < 0) { goto out; } else { - ret = relocate_service(svcName, RG_START_REMOTE, + ret = svc_start_remote(svcName, RG_START_REMOTE, target); } --- cluster/rgmanager/src/daemons/rg_thread.c 2007/07/23 20:49:13 1.24 +++ cluster/rgmanager/src/daemons/rg_thread.c 2007/11/30 21:36:28 1.25 @@ -16,12 +16,12 @@ Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include #include #include #include -#include /** * Resource thread list entry. @@ -54,6 +54,7 @@ int rt_enqueue_request(const char *resgroupname, int request, msgctx_t *response_ctx, int max, uint32_t target, int arg0, int arg1); +int central_events_enabled(void); /** @@ -446,6 +447,11 @@ error = svc_stop(myname, RG_STOP_RECOVER); if (error == 0) { + /* Stop generates an event - whatever the + result. If central events are enabled + don't bother trying to recover */ + if (central_events_enabled()) + break; error = handle_recover_req(myname, &newowner); if (error == 0) ret = RG_SUCCESS; @@ -690,7 +696,7 @@ } else { if (max) { list_do(resgroup->rt_queue, curr) { - if (curr->rr_request == request) + if ((int)curr->rr_request == request) count++; } while (!list_done(resgroup->rt_queue, curr)); @@ -713,7 +719,7 @@ case RG_START: case RG_ENABLE: send_ret(response_ctx, resgroup->rt_name, RG_EDEADLCK, - request); + request, 0); msg_close(response_ctx); msg_free_ctx(response_ctx); break; --- cluster/rgmanager/src/daemons/test.c 2007/11/30 20:36:18 1.13 +++ cluster/rgmanager/src/daemons/test.c 2007/11/30 21:36:28 1.14 @@ -29,6 +29,7 @@ #include #include #include +#include #ifndef NO_CCS #error "Can not be built with CCS support." @@ -162,6 +163,7 @@ resource_t *reslist = NULL, *curres; resource_node_t *tree = NULL, *tmp, *rn = NULL; int ccsfd, ret = 0, rules = 0; + event_table_t *events = NULL; fprintf(stderr,"Running in test mode.\n"); @@ -174,6 +176,7 @@ load_resource_rules(agentpath, &rulelist); construct_domains(ccsfd, &domains); + construct_events(ccsfd, &events); construct_depends(ccsfd, &depends); load_resources(ccsfd, &reslist, &rulelist); build_resource_tree(ccsfd, &tree, &rulelist, &reslist); @@ -214,6 +217,11 @@ printf("=== Dependencies ===\n"); print_depends(stdout, &depends); } + + if (events) { + printf("=== Event Triggers ===\n"); + print_events(events); + } } ccs_unlock(ccsfd); @@ -285,6 +293,7 @@ out: deconstruct_depends(&depends); + deconstruct_events(&events); deconstruct_domains(&domains); destroy_resource_tree(&tree); destroy_resources(&reslist); /cvs/cluster/cluster/rgmanager/src/resources/default_event_script.sl,v --> standard output revision 1.1 --- cluster/rgmanager/src/resources/default_event_script.sl +++ - 2007-11-30 21:36:32.918207000 +0000 @@ -0,0 +1,291 @@ +define node_in_set(node_list, node) +{ + variable x, len; + + len = length(node_list); + for (x = 0; x < len; x++) { + if (node_list[x] == node) + return 1; + } + + return 0; +} + +define move_or_start(service, node_list) +{ + variable len; + variable state, owner; + variable depends; + + depends = service_property(service, "depend"); + if (depends != "") { + (owner, state) = service_status(depends); + if (owner < 0) { + debug(service, " is not runnable; dependency not met"); + return ERR_DEPEND; + } + } + + (owner, state) = service_status(service); + debug("Evaluating ", service, " state=", state, " owner=", owner); + + len = length(node_list); + if (len == 0) { + debug(service, " is not runnable"); + return ERR_DOMAIN; + } + + if (((event_type != EVENT_USER) and (state == "disabled")) or (state == "failed")) { + % + % Commenting out this block will -not- allow you to + % recover failed services from event scripts. Sorry. + % All it will get you is a false log message about + % starting this service. + % + % You may enable disabled services, but I recommend + % against it. + % + debug(service, " is not runnable"); + return -1; + } + + if (node_list[0] == owner) { + debug(service, " is already running on best node"); + return ERR_RUNNING; + } + + if ((owner >= 0) and (node_in_set(node_list, owner) == 1)) { + notice("Moving ", service, " from ", owner, + " to ", node_list); + if (service_stop(service) < 0) { + return ERR_ABORT; + } + } else { + notice("Starting ", service, " on ", node_list); + } + + return service_start(service, node_list); +} + + +% +% Returns the set of online nodes in preferred/shuffled order which +% are allowed to run this service. Gives highest preference to current +% owner if nofailback is specified. +% +define allowed_nodes(service) +{ + variable anodes; + variable online; + variable nodes_domain; + variable ordered, restricted, nofailback; + variable state, owner; + variable depends; + + (nofailback, restricted, ordered, nodes_domain) = + service_domain_info(service); + + (owner, state) = service_status(service); + + anodes = nodes_online(); + + % Shuffle the array so we don't start all services on the same + % node. TODO - add RR, Least-services, placement policies... + online = shuffle(anodes); + + if (restricted == 1) { + anodes = intersection(nodes_domain, online); + } else { + % Ordered failover domains (nodes_domain) unioned with the + % online nodes basically just reorders the online node list + % according to failover domain priority rules. + anodes = union(intersection(nodes_domain, online), + online); + } + + if ((nofailback == 1) or (ordered == 0)) { + + if ((owner < 0) or (node_in_set(anodes, owner) == 0)) { + return anodes; + } + + % Because union takes left as priority, we can + % return the union of the current owner with the + % allowed node list. This means the service will + % remain on the same node it's currently on. + return union(owner, anodes); + } + + return anodes; +} + + +define default_node_event_handler() +{ + variable services = service_list(); + variable x; + variable nodes; + + % debug("Executing default node event handler"); + for (x = 0; x < length(services); x++) { + nodes = allowed_nodes(services[x]); + ()=move_or_start(services[x], nodes); + } +} + + +define default_service_event_handler() +{ + variable services = service_list(); + variable x; + variable depends; + variable policy; + variable nodes; + variable tmp; + variable owner; + variable state; + + % debug("Executing default service event handler"); + + if (service_state == "recovering") { + + policy = service_property(service_name, "recovery"); + debug("Recovering", + " Service: ", service_name, + " Last owner: ", service_last_owner, + " Policy: ", policy); + + if (policy == "disable") { + () = service_stop(service_name, 1); + return; + } + + nodes = allowed_nodes(service_name); + if (policy == "restart") { + tmp = union(service_last_owner, nodes); + } else { + % relocate + tmp = subtract(nodes, service_last_owner); + nodes = tmp; + tmp = union(nodes, service_last_owner); + } + + ()=move_or_start(service_name, nodes); + + return; + } + + for (x = 0; x < length(services); x++) { + if (service_name == services[x]) { + % don't do anything to ourself! + continue; + } + + % + % Simplistic dependency handling + % + depends = service_property(services[x], "depend"); + + % No dependency; do nothing + if (depends != service_name) { + continue; + } + + (owner, state) = service_status(services[x]); + if ((service_state == "started") and (owner < 0)) { + info("Dependency met; starting ", services[x]); + nodes = allowed_nodes(services[x]); + ()=move_or_start(services[x], nodes); + } + + % service died - stop service(s) that depend on the dead + if ((service_owner < 0) and (owner >= 0)) { + info("Dependency lost; stopping ", services[x]); + ()=service_stop(services[x]); + } + } +} + +define default_config_event_handler() +{ + % debug("Executing default config event handler"); +} + +define default_user_event_handler() +{ + variable ret; + variable nodes; + variable reordered; + variable x; + variable target = user_target; + variable found = 0; + variable owner, state; + + nodes = allowed_nodes(service_name); + (owner, state) = service_status(service_name); + + if (user_request == USER_RESTART) { + + if (owner >= 0) { + reordered = union(owner, nodes); + nodes = reordered; + } + + notice("Stopping ", service_name, " for relocate to ", nodes); + + found = service_stop(service_name); + if (found < 0) { + return ERR_ABORT; + } + + ret = move_or_start(service_name, nodes); + + } else if ((user_request == USER_RELOCATE) or + (user_request == USER_ENABLE)) { + + if (user_target > 0) { + for (x = 0; x < length(nodes); x++) { + if (nodes[x] == user_target) { + reordered = union(user_target, nodes); + nodes = reordered; + found = 1; + } + } + + if (found == 0) { + warning("User specified node ", user_target, + " is offline"); + } + } + + if ((owner >= 0) and (user_request == USER_RELOCATE)) { + if (service_stop(service_name) < 0) { + return ERR_ABORT; + } + } + + ret = move_or_start(service_name, nodes); + + } else if (user_request == USER_DISABLE) { + + ret = service_stop(service_name, 1); + + } else if (user_request == USER_STOP) { + + ret = service_stop(service_name); + + } + % todo - migrate + + return ret; +} + +if (event_type == EVENT_NODE) + default_node_event_handler(); +if (event_type == EVENT_SERVICE) + default_service_event_handler(); +if (event_type == EVENT_CONFIG) + default_config_event_handler(); +if (event_type == EVENT_USER) + user_return=default_user_event_handler(); + --- cluster/rgmanager/src/resources/Makefile 2007/07/12 11:25:10 1.20 +++ cluster/rgmanager/src/resources/Makefile 2007/11/30 21:36:29 1.21 @@ -22,7 +22,7 @@ mysql.metadata postgres-8.metadata tomcat-5.metadata \ named.metadata -TARGETS=${RESOURCES} ocf-shellfuncs svclib_nfslock +TARGETS=ocf-shellfuncs svclib_nfslock default_event_script.sl UTIL_TARGETS= \ utils/config-utils.sh utils/ra-skelet.sh utils/messages.sh \ @@ -36,7 +36,8 @@ echo ${sbindir} install -d ${sharedir} install -d ${sharedir}/utils - install $(TARGETS) ${sharedir} + install $(RESOURCES) ${sharedir} + install -m 644 $(TARGETS) ${sharedir} install $(UTIL_TARGETS) ${sharedir}/utils install -m 644 $(METADATA) ${sharedir}