From mboxrd@z Thu Jan 1 00:00:00 1970 From: sbradley@redhat.com Date: Thu, 31 Jan 2013 09:41:30 -0500 Subject: [Cluster-devel] [PATCH] gfs2_lockcapture: Capture the status of the cluster nodes and find the clusternode name and id. Message-ID: <1359643290-21777-1-git-send-email-sbradley@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit From: Shane Bradley The status of the cluster will be captured and written to the file with respect to version: cman_tool nodes, corosync-quorumtool -l. Added two new configuration variables to the hostinformation.txt for the clusternode name and id. Signed-off-by: Shane Bradley --- gfs2/scripts/gfs2_lockcapture | 102 +++++++++++++++++++++++++++++++----------- 1 file changed, 76 insertions(+), 26 deletions(-) diff --git a/gfs2/scripts/gfs2_lockcapture b/gfs2/scripts/gfs2_lockcapture index 2b3421c..6a63fc8 100644 --- a/gfs2/scripts/gfs2_lockcapture +++ b/gfs2/scripts/gfs2_lockcapture @@ -45,12 +45,15 @@ class ClusterNode: """ This class represents a cluster node that is a current memeber in a cluster. """ - def __init__(self, clusternodeName, clusterName, mapOfMountedFilesystemLabels): + def __init__(self, clusternodeName, clusternodeID, clusterName, mapOfMountedFilesystemLabels): """ @param clusternodeName: The name of the cluster node. @type clusternodeName: String @param clusterName: The name of the cluster that this cluster node is a member of. + @param clusternodeID: The id of the cluster node. + @type clusternodeID: Int + @param clusterName: The name of the cluster that this cluster node is a @type clusterName: String @param mapOfMountedFilesystemLabels: A map of filesystem labels(key) for a mounted filesystem. The value is the line for the matching mounted @@ -58,6 +61,7 @@ class ClusterNode: @type mapOfMountedFilesystemLabels: Dict """ self.__clusternodeName = clusternodeName + self.__clusternodeID = clusternodeID self.__clusterName = clusterName self.__mapOfMountedFilesystemLabels = mapOfMountedFilesystemLabels @@ -69,7 +73,7 @@ class ClusterNode: @rtype: String """ rString = "" - rString += "%s:%s" %(self.getClusterName(), self.getClusterNodeName()) + rString += "%s:%s(id:%d)" %(self.getClusterName(), self.getClusterNodeName(), self.getClusterNodeID()) fsLabels = self.__mapOfMountedFilesystemLabels.keys() fsLabels.sort() for fsLabel in fsLabels: @@ -85,6 +89,14 @@ class ClusterNode: """ return self.__clusternodeName + def getClusterNodeID(self): + """ + Returns the id of the cluster node. + @return: Returns the id of the cluster node. + @rtype: String + """ + return self.__clusternodeID + def getClusterName(self): """ Returns the name of cluster that this cluster node is a member of. @@ -539,6 +551,7 @@ def getClusterNode(listOfGFS2Names): # in the output, else return None. clusterName = "" clusternodeName = "" + clusternodeID = "" if (runCommand("which", ["cman_tool"])): stdout = runCommandOutput("cman_tool", ["status"]) if (not stdout == None): @@ -550,6 +563,8 @@ def getClusterNode(listOfGFS2Names): clusterName = line.split("Cluster Name:")[1].strip().rstrip() if (line.startswith("Node name: ")): clusternodeName = line.split("Node name:")[1].strip().rstrip() + if (line.startswith("Node ID: ")): + clusternodeID = line.split("Node ID: ")[1].strip().rstrip() elif (runCommand("which", ["corosync-cmapctl"])): # Another way to get the local cluster node is: $ crm_node -i; crm_node -l # Get the name of the cluster. @@ -559,14 +574,14 @@ def getClusterNode(listOfGFS2Names): if (len(stdoutSplit) == 2): clusterName = stdoutSplit[1].strip().rstrip() # Get the id of the local cluster node so we can get the clusternode name - thisNodeID = "" + clusternodeID = "" stdout = runCommandOutput("corosync-cmapctl", ["-g", "runtime.votequorum.this_node_id"]) if (not stdout == None): stdoutSplit = stdout.split("=") if (len(stdoutSplit) == 2): - thisNodeID = stdoutSplit[1].strip().rstrip() + clusternodeID = stdoutSplit[1].strip().rstrip() # Now that we the nodeid then we can get the clusternode name. - if (len(thisNodeID) > 0): + if (len(clusternodeID) > 0): stdout = runCommandOutput("corosync-quorumtool", ["-l"]) if (not stdout == None): for line in stdout.split("\n"): @@ -588,7 +603,15 @@ def getClusterNode(listOfGFS2Names): break if ((not foundMatch) and (mapOfMountedFilesystemLabels.has_key(label))): del(mapOfMountedFilesystemLabels[label]) - return ClusterNode(clusternodeName, clusterName, mapOfMountedFilesystemLabels) + # Cast the node id to an int, and default is 0 if node is not found or + # not castable. + clusternodeIDInt = 0 + if (clusternodeID.isalnum()): + try: + clusternodeIDInt = int(clusternodeID) + except(ValueError): + pass + return ClusterNode(clusternodeName, clusternodeIDInt, clusterName, mapOfMountedFilesystemLabels) else: return None @@ -701,6 +724,28 @@ def gatherGeneralInformation(pathToDSTDir): message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput) logging.getLogger(MAIN_LOGGER_NAME).error(message) + # Write the status of all the nodes in the cluster out. + if (runCommand("which", ["cman_tool"])): + command = "cman_tool" + pathToCommandOutput = os.path.join(pathToDSTDir, "cman_tool_status") + try: + fout = open(pathToCommandOutput, "w") + runCommand(command, ["status"], standardOut=fout) + fout.close() + except IOError: + message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput) + logging.getLogger(MAIN_LOGGER_NAME).error(message) + elif (runCommand("which", ["corosync-cmapctl"])): + command = "corosync-quorumtool" + pathToCommandOutput = os.path.join(pathToDSTDir, "corosync-quorumtool_l") + try: + fout = open(pathToCommandOutput, "w") + runCommand(command, ["-l"], standardOut=fout) + fout.close() + except IOError: + message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput) + logging.getLogger(MAIN_LOGGER_NAME).error(message) + def isProcPidStackEnabled(pathToPidData): """ @@ -1067,26 +1112,6 @@ if __name__ == "__main__": # script running. writeToFile(PATH_TO_PID_FILENAME, str(os.getpid()), createFile=True) # ####################################################################### - # Verify they want to continue because this script will trigger sysrq events. - # ####################################################################### - if (not cmdLineOpts.disableQuestions): - valid = {"yes":True, "y":True, "no":False, "n":False} - question = "This script will trigger a sysrq -t event or collect the data for each pid directory located in /proc for each run. Are you sure you want to continue?" - prompt = " [y/n] " - while True: - sys.stdout.write(question + prompt) - choice = raw_input().lower() - if (choice in valid): - if (valid.get(choice)): - # If yes, or y then exit loop and continue. - break - else: - message = "The script will not continue since you chose not to continue." - logging.getLogger(MAIN_LOGGER_NAME).error(message) - exitScript(removePidFile=True, errorCode=1) - else: - sys.stdout.write("Please respond with '(y)es' or '(n)o'.\n") - # ####################################################################### # Get the clusternode name and verify that mounted GFS2 filesystems were # found. # ####################################################################### @@ -1110,6 +1135,26 @@ if __name__ == "__main__": print clusternode exitScript() # ####################################################################### + # Verify they want to continue because this script will trigger sysrq events. + # ####################################################################### + if (not cmdLineOpts.disableQuestions): + valid = {"yes":True, "y":True, "no":False, "n":False} + question = "This script will trigger a sysrq -t event or collect the data for each pid directory located in /proc for each run. Are you sure you want to continue?" + prompt = " [y/n] " + while True: + sys.stdout.write(question + prompt) + choice = raw_input().lower() + if (choice in valid): + if (valid.get(choice)): + # If yes, or y then exit loop and continue. + break + else: + message = "The script will not continue since you chose not to continue." + logging.getLogger(MAIN_LOGGER_NAME).error(message) + exitScript(removePidFile=True, errorCode=1) + else: + sys.stdout.write("Please respond with '(y)es' or '(n)o'.\n") + # ####################################################################### # Create the output directory to verify it can be created before # proceeding unless it is already created from a previous run data needs # to be analyzed. Probably could add more debugging on if file or dir. @@ -1178,6 +1223,11 @@ if __name__ == "__main__": message = "Pass (%d/%d): Gathering general information about the host." %(i, cmdLineOpts.numberOfRuns) logging.getLogger(MAIN_LOGGER_NAME).debug(message) gatherGeneralInformation(pathToOutputRunDir) + # Write the clusternode name and id to the general information file. + writeToFile(os.path.join(pathToOutputRunDir, "hostinformation.txt"), + "NODE_NAME=%s\nNODE_ID=%d" %(clusternode.getClusterNodeName(), clusternode.getClusterNodeID()), + appendToFile=True, createFile=True) + # Going to sleep for 2 seconds, so that TIMESTAMP should be in the # past in the logs so that capturing sysrq data will be guaranteed. time.sleep(2) -- 1.8.0.2