cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: Fabio M. Di Nitto <fdinitto@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH 2/2] checkquorum.wdmd: add integration script with wdmd
Date: Tue,  9 Oct 2012 11:36:05 +0200	[thread overview]
Message-ID: <1349775365-31526-2-git-send-email-fdinitto@redhat.com> (raw)
In-Reply-To: <1349775365-31526-1-git-send-email-fdinitto@redhat.com>

From: "Fabio M. Di Nitto" <fdinitto@redhat.com>

requires wdmd >= 2.6

Resolves: rhbz#509056

Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
---
 cman/scripts/Makefile         |    2 +-
 cman/scripts/checkquorum.wdmd |  104 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 1 deletions(-)
 create mode 100644 cman/scripts/checkquorum.wdmd

diff --git a/cman/scripts/Makefile b/cman/scripts/Makefile
index b4866c8..7950311 100644
--- a/cman/scripts/Makefile
+++ b/cman/scripts/Makefile
@@ -1,4 +1,4 @@
-SHAREDIRTEX=checkquorum
+SHAREDIRTEX=checkquorum checkquorum.wdmd
 
 include ../../make/defines.mk
 include $(OBJDIR)/make/clean.mk
diff --git a/cman/scripts/checkquorum.wdmd b/cman/scripts/checkquorum.wdmd
new file mode 100644
index 0000000..1d81ff6
--- /dev/null
+++ b/cman/scripts/checkquorum.wdmd
@@ -0,0 +1,104 @@
+#!/bin/bash
+# Quorum detection watchdog script
+#
+# This script will return -2 if the node had quorum at one point
+# and then subsequently lost it
+#
+# Copyright 2012 Red Hat, Inc.
+
+# defaults
+
+# Amount of time in seconds to wait after quorum is lost to fail script
+waittime=60
+
+# action to take if quorum is missing for over > waittime
+# autodetect|hardreboot|crashdump|watchdog
+action=autodetect
+
+# Location of temporary file to capture timeouts
+timerfile="/var/run/cluster/checkquorum-timer"
+
+# rpm based distros
+[ -d /etc/sysconfig ] && \
+	[ -f /etc/sysconfig/checkquorum ] && \
+	. /etc/sysconfig/checkquorum
+
+# deb based distros
+[ ! -d /etc/sysconfig ] && \
+	[ -f /etc/default/checkquorum ] && \
+	. /etc/default/checkquorum
+
+has_quorum() {
+	corosync-quorumtool -s 2>/dev/null | \
+		grep ^Quorate: | \
+		grep -q Yes$
+}
+
+had_quorum() {
+	output="$(corosync-objctl 2>/dev/null | \
+		grep runtime.totem.pg.mrp.srp.operational_entered | cut -d "=" -f 2)"
+	[ -n "$output" ] && {
+		[ "$output" -ge 1 ] && return 0
+		return 1
+	}
+}
+
+take_action() {
+	case "$action" in
+		watchdog)
+			[ -n "$wdmd_action" ] && return 1
+			;;
+		hardreboot)
+			echo 1 > /proc/sys/kernel/sysrq
+			echo b > /proc/sysrq-trigger
+			;;
+		crashdump)
+			echo 1 > /proc/sys/kernel/sysrq
+			echo c > /proc/sysrq-trigger
+			;;
+		autodetect)
+			service kdump status > /dev/null 2>&1
+			usekexec="$?"
+			[ -n "$wdmd_action" ] && [ "$usekexec" != "0" ] && return 1
+			echo 1 > /proc/sys/kernel/sysrq
+			[ "$usekexec" = "0" ] && echo c > /proc/sysrq-trigger
+			echo b > /proc/sysrq-trigger
+	esac
+}
+
+# watchdog uses $1 = test or = repair
+# with no arguments we are called by wdmd
+[ -z "$1" ] && wdmd_action=yes
+
+# we don't support watchdog repair action
+[ "$1" = "repair" ] && exit 1
+
+service corosync status > /dev/null 2>&1
+ret=$?
+
+case "$ret" in
+	3) # corosync is not running (clean)
+		rm -f "$timerfile"
+		exit 0
+		;;
+	1) # corosync crashed or did exit abonormally (dirty - take action)
+		logger -t checkquorum.wdmd "corosync crashed or exited abonarmally. Node will soon reboot"
+		take_action
+		;;
+	0) # corosync is running (clean)
+		# check quorum here
+		has_quorum && {
+			echo -e "oldtime=$(date +%s)" > "$timerfile"
+			exit 0
+		}
+		. "$timerfile"
+		newtime="$(date +%s)" 
+		delta=$((newtime - oldtime))
+		logger -t checkquorum.wdmd "Node has lost quorum. Node will soon reboot"
+		had_quorum && [ "$delta" -gt "$waittime" ] && {
+			take_action
+		}
+		;;
+esac
+
+exit $?
-- 
1.7.7.6



  reply	other threads:[~2012-10-09  9:36 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-09  9:36 [Cluster-devel] [PATCH 1/2] cman init: make sure we start after fence_sanlockd and warn users Fabio M. Di Nitto
2012-10-09  9:36 ` Fabio M. Di Nitto [this message]
2012-10-10  4:26   ` [Cluster-devel] [PATCH 2/2] checkquorum.wdmd: add integration script with wdmd Dietmar Maurer
2012-10-10  6:59     ` Fabio M. Di Nitto
2012-10-10  8:06       ` Dietmar Maurer
2012-10-10  8:11         ` Fabio M. Di Nitto
2012-10-10  8:15           ` Dietmar Maurer
2012-10-10 11:04           ` Heiko Nardmann
2012-10-10 11:14             ` Fabio M. Di Nitto
2012-10-10  4:33   ` Dietmar Maurer
2012-10-10  7:06     ` Fabio M. Di Nitto
2012-10-10  8:10       ` Dietmar Maurer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1349775365-31526-2-git-send-email-fdinitto@redhat.com \
    --to=fdinitto@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).