* [Cluster-devel] cluster/rgmanager/src/resources Makefile netfs ...
@ 2007-12-04 21:59 lhh
0 siblings, 0 replies; only message in thread
From: lhh @ 2007-12-04 21:59 UTC (permalink / raw)
To: cluster-devel.redhat.com
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL5
Changes by: lhh at sourceware.org 2007-12-04 21:59:54
Modified files:
rgmanager/src/resources: Makefile netfs.sh
Added files:
rgmanager/src/resources: default_event_script.sl
Log message:
Port force-unmount from RHEL4 branch
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/default_event_script.sl.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.13.2.6&r2=1.13.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/netfs.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.7.2.2&r2=1.7.2.3
/cvs/cluster/cluster/rgmanager/src/resources/default_event_script.sl,v --> standard output
revision 1.1.2.1
--- cluster/rgmanager/src/resources/default_event_script.sl
+++ - 2007-12-04 21:59:54.602852000 +0000
@@ -0,0 +1,291 @@
+define node_in_set(node_list, node)
+{
+ variable x, len;
+
+ len = length(node_list);
+ for (x = 0; x < len; x++) {
+ if (node_list[x] == node)
+ return 1;
+ }
+
+ return 0;
+}
+
+define move_or_start(service, node_list)
+{
+ variable len;
+ variable state, owner;
+ variable depends;
+
+ depends = service_property(service, "depend");
+ if (depends != "") {
+ (owner, state) = service_status(depends);
+ if (owner < 0) {
+ debug(service, " is not runnable; dependency not met");
+ return ERR_DEPEND;
+ }
+ }
+
+ (owner, state) = service_status(service);
+ debug("Evaluating ", service, " state=", state, " owner=", owner);
+
+ len = length(node_list);
+ if (len == 0) {
+ debug(service, " is not runnable");
+ return ERR_DOMAIN;
+ }
+
+ if (((event_type != EVENT_USER) and (state == "disabled")) or (state == "failed")) {
+ %
+ % Commenting out this block will -not- allow you to
+ % recover failed services from event scripts. Sorry.
+ % All it will get you is a false log message about
+ % starting this service.
+ %
+ % You may enable disabled services, but I recommend
+ % against it.
+ %
+ debug(service, " is not runnable");
+ return -1;
+ }
+
+ if (node_list[0] == owner) {
+ debug(service, " is already running on best node");
+ return ERR_RUNNING;
+ }
+
+ if ((owner >= 0) and (node_in_set(node_list, owner) == 1)) {
+ notice("Moving ", service, " from ", owner,
+ " to ", node_list);
+ if (service_stop(service) < 0) {
+ return ERR_ABORT;
+ }
+ } else {
+ notice("Starting ", service, " on ", node_list);
+ }
+
+ return service_start(service, node_list);
+}
+
+
+%
+% Returns the set of online nodes in preferred/shuffled order which
+% are allowed to run this service. Gives highest preference to current
+% owner if nofailback is specified.
+%
+define allowed_nodes(service)
+{
+ variable anodes;
+ variable online;
+ variable nodes_domain;
+ variable ordered, restricted, nofailback;
+ variable state, owner;
+ variable depends;
+
+ (nofailback, restricted, ordered, nodes_domain) =
+ service_domain_info(service);
+
+ (owner, state) = service_status(service);
+
+ anodes = nodes_online();
+
+ % Shuffle the array so we don't start all services on the same
+ % node. TODO - add RR, Least-services, placement policies...
+ online = shuffle(anodes);
+
+ if (restricted == 1) {
+ anodes = intersection(nodes_domain, online);
+ } else {
+ % Ordered failover domains (nodes_domain) unioned with the
+ % online nodes basically just reorders the online node list
+ % according to failover domain priority rules.
+ anodes = union(intersection(nodes_domain, online),
+ online);
+ }
+
+ if ((nofailback == 1) or (ordered == 0)) {
+
+ if ((owner < 0) or (node_in_set(anodes, owner) == 0)) {
+ return anodes;
+ }
+
+ % Because union takes left as priority, we can
+ % return the union of the current owner with the
+ % allowed node list. This means the service will
+ % remain on the same node it's currently on.
+ return union(owner, anodes);
+ }
+
+ return anodes;
+}
+
+
+define default_node_event_handler()
+{
+ variable services = service_list();
+ variable x;
+ variable nodes;
+
+ % debug("Executing default node event handler");
+ for (x = 0; x < length(services); x++) {
+ nodes = allowed_nodes(services[x]);
+ ()=move_or_start(services[x], nodes);
+ }
+}
+
+
+define default_service_event_handler()
+{
+ variable services = service_list();
+ variable x;
+ variable depends;
+ variable policy;
+ variable nodes;
+ variable tmp;
+ variable owner;
+ variable state;
+
+ % debug("Executing default service event handler");
+
+ if (service_state == "recovering") {
+
+ policy = service_property(service_name, "recovery");
+ debug("Recovering",
+ " Service: ", service_name,
+ " Last owner: ", service_last_owner,
+ " Policy: ", policy);
+
+ if (policy == "disable") {
+ () = service_stop(service_name, 1);
+ return;
+ }
+
+ nodes = allowed_nodes(service_name);
+ if (policy == "restart") {
+ tmp = union(service_last_owner, nodes);
+ } else {
+ % relocate
+ tmp = subtract(nodes, service_last_owner);
+ nodes = tmp;
+ tmp = union(nodes, service_last_owner);
+ }
+
+ ()=move_or_start(service_name, nodes);
+
+ return;
+ }
+
+ for (x = 0; x < length(services); x++) {
+ if (service_name == services[x]) {
+ % don't do anything to ourself!
+ continue;
+ }
+
+ %
+ % Simplistic dependency handling
+ %
+ depends = service_property(services[x], "depend");
+
+ % No dependency; do nothing
+ if (depends != service_name) {
+ continue;
+ }
+
+ (owner, state) = service_status(services[x]);
+ if ((service_state == "started") and (owner < 0)) {
+ info("Dependency met; starting ", services[x]);
+ nodes = allowed_nodes(services[x]);
+ ()=move_or_start(services[x], nodes);
+ }
+
+ % service died - stop service(s) that depend on the dead
+ if ((service_owner < 0) and (owner >= 0)) {
+ info("Dependency lost; stopping ", services[x]);
+ ()=service_stop(services[x]);
+ }
+ }
+}
+
+define default_config_event_handler()
+{
+ % debug("Executing default config event handler");
+}
+
+define default_user_event_handler()
+{
+ variable ret;
+ variable nodes;
+ variable reordered;
+ variable x;
+ variable target = user_target;
+ variable found = 0;
+ variable owner, state;
+
+ nodes = allowed_nodes(service_name);
+ (owner, state) = service_status(service_name);
+
+ if (user_request == USER_RESTART) {
+
+ if (owner >= 0) {
+ reordered = union(owner, nodes);
+ nodes = reordered;
+ }
+
+ notice("Stopping ", service_name, " for relocate to ", nodes);
+
+ found = service_stop(service_name);
+ if (found < 0) {
+ return ERR_ABORT;
+ }
+
+ ret = move_or_start(service_name, nodes);
+
+ } else if ((user_request == USER_RELOCATE) or
+ (user_request == USER_ENABLE)) {
+
+ if (user_target > 0) {
+ for (x = 0; x < length(nodes); x++) {
+ if (nodes[x] == user_target) {
+ reordered = union(user_target, nodes);
+ nodes = reordered;
+ found = 1;
+ }
+ }
+
+ if (found == 0) {
+ warning("User specified node ", user_target,
+ " is offline");
+ }
+ }
+
+ if ((owner >= 0) and (user_request == USER_RELOCATE)) {
+ if (service_stop(service_name) < 0) {
+ return ERR_ABORT;
+ }
+ }
+
+ ret = move_or_start(service_name, nodes);
+
+ } else if (user_request == USER_DISABLE) {
+
+ ret = service_stop(service_name, 1);
+
+ } else if (user_request == USER_STOP) {
+
+ ret = service_stop(service_name);
+
+ }
+ % todo - migrate
+
+ return ret;
+}
+
+if (event_type == EVENT_NODE)
+ default_node_event_handler();
+if (event_type == EVENT_SERVICE)
+ default_service_event_handler();
+if (event_type == EVENT_CONFIG)
+ default_config_event_handler();
+if (event_type == EVENT_USER)
+ user_return=default_user_event_handler();
+
--- cluster/rgmanager/src/resources/Makefile 2007/07/12 11:23:16 1.13.2.6
+++ cluster/rgmanager/src/resources/Makefile 2007/12/04 21:59:54 1.13.2.7
@@ -34,6 +34,9 @@
utils/httpd-parse-config.pl utils/tomcat-parse-config.pl \
utils/member_util.sh
+EVENT_TARGETS= \
+ default_event_script.sl
+
all:
install: all
@@ -44,6 +47,7 @@
install $(TARGETS) ${sharedir}
install $(UTIL_TARGETS) ${sharedir}/utils
install -m 644 $(METADATA) ${sharedir}
+ install -m 644 $(EVENT_TARGETS) ${sharedir}
uninstall:
${UNINSTALL} ${UTIL_TARGETS} ${sharedir}/utils
--- cluster/rgmanager/src/resources/netfs.sh 2007/10/03 16:44:15 1.7.2.2
+++ cluster/rgmanager/src/resources/netfs.sh 2007/12/04 21:59:54 1.7.2.3
@@ -348,6 +348,112 @@
return $NO
}
+#
+# killMountProcesses mount_point
+#
+# Using lsof or fuser try to unmount the mount by killing of the processes
+# that might be keeping it busy.
+#
+killMountProcesses()
+{
+ typeset -i ret=$SUCCESS
+ typeset have_lsof=""
+ typeset have_fuser=""
+ typeset try
+
+ if [ $# -ne 1 ]; then
+ ocf_log err \
+ "Usage: killMountProcesses mount_point"
+ return $FAIL
+ fi
+
+ typeset mp=$1
+
+ ocf_log notice "Forcefully unmounting $mp"
+
+ #
+ # Not all distributions have lsof. If not use fuser. If it
+ # does, try both.
+ #
+ file=$(which lsof 2>/dev/null)
+ if [ -f "$file" ]; then
+ have_lsof=$YES
+ fi
+
+ file=$(which fuser 2>/dev/null)
+ if [ -f "$file" ]; then
+ have_fuser=$YES
+ fi
+
+ if [ -z "$have_lsof" -a -z "$have_fuser" ]; then
+ ocf_log warn \
+ "Cannot forcefully unmount $mp; cannot find lsof or fuser commands"
+ return $FAIL
+ fi
+
+ for try in 1 2 3; do
+ if [ -n "$have_lsof" ]; then
+ #
+ # Use lsof to free up mount point
+ #
+ while read command pid user
+ do
+ if [ -z "$pid" ]; then
+ continue
+ fi
+
+ if [ $try -eq 1 ]; then
+ ocf_log warn \
+ "killing process $pid ($user $command $mp)"
+ elif [ $try -eq 3 ]; then
+ ocf_log crit \
+ "Could not clean up mountpoint $mp"
+ ret=$FAIL
+ fi
+
+ if [ $try -gt 1 ]; then
+ kill -9 $pid
+ else
+ kill -TERM $pid
+ fi
+ done < <(lsof -w -bn 2>/dev/null | \
+ grep -w -E "$mp(/.*|)\$" | \
+ awk '{print $1,$2,$3}' | \
+ sort -u -k 1,3)
+ elif [ -n "$have_fuser" ]; then
+ #
+ # Use fuser to free up mount point
+ #
+ while read command pid user
+ do
+ if [ -z "$pid" ]; then
+ continue
+ fi
+
+ if [ $try -eq 1 ]; then
+ ocf_log warn \
+ "killing process $pid ($user $command $mp)"
+ elif [ $try -eq 3 ]; then
+ ocf_log crit \
+ "Could not clean up mount point $mp"
+ ret=$FAIL
+ fi
+
+ if [ $try -gt 1 ]; then
+ kill -9 $pid
+ else
+ kill -TERM $pid
+ fi
+ done < <(fuser -vm $mp | \
+ grep -v PID | \
+ sed 's;^'$mp';;' | \
+ awk '{print $4,$2,$1}' | \
+ sort -u -k 1,3)
+ fi
+ done
+
+ return $ret
+}
#
# startNFSFilesystem
@@ -498,8 +604,8 @@
#
if [ -n "$mp" ]; then
case ${OCF_RESKEY_force_unmount} in
- $YES_STR) force_umount="-f" ;;
- 0) force_umount="-f" ;;
+ $YES_STR) force_umount="$YES" ;;
+ 1) force_umount="$YES" ;;
*) force_umount="" ;;
esac
fi
@@ -507,6 +613,7 @@
#
# Unmount
#
+ while [ ! "$done" ]; do
isMounted $fullpath $mp
case $? in
$NO)
@@ -519,26 +626,46 @@
;;
$YES)
sync; sync; sync
- ocf_log info "unmounting $fullpath ($mp)"
+ ocf_log info "unmounting $mp"
- umount $force_umount $mp
+ umount $mp
if [ $? -eq 0 ]; then
- return $SUCCESS
+ umount_failed=
+ done=$YES
+ continue
fi
umount_failed=yes
+ if [ "$force_umount" ]; then
+ killMountProcesses $mp
+ fi
+
+ if [ $try -ge $max_tries ]; then
+ done=$YES
+ else
+ sleep $sleep_time
+ let try=try+1
+ fi
;;
*)
return $FAIL
;;
esac
+ if [ $try -ge $max_tries ]; then
+ done=$YES
+ else
+ sleep $sleep_time
+ let try=try+1
+ fi
+ done # while
if [ -n "$umount_failed" ]; then
ocf_log err "'umount $fullpath' failed ($mp), error=$ret_val"
return $FAIL
fi
+
return $SUCCESS
}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2007-12-04 21:59 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-12-04 21:59 [Cluster-devel] cluster/rgmanager/src/resources Makefile netfs lhh
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.