cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
* [Cluster-devel] [PATCH] gfs2_lockcapture: Capture the status of the cluster nodes and find the clusternode name and id.
@ 2013-01-31 14:41 sbradley
  2013-02-05 13:23 ` Steven Whitehouse
  0 siblings, 1 reply; 2+ messages in thread
From: sbradley @ 2013-01-31 14:41 UTC (permalink / raw)
  To: cluster-devel.redhat.com

From: Shane Bradley <sbradley@redhat.com>

The status of the cluster will be captured and written to the file with respect
to version: cman_tool nodes, corosync-quorumtool -l. Added two new configuration
variables to the hostinformation.txt for the clusternode name and id.

Signed-off-by: Shane Bradley <sbradley@redhat.com>
---
 gfs2/scripts/gfs2_lockcapture | 102 +++++++++++++++++++++++++++++++-----------
 1 file changed, 76 insertions(+), 26 deletions(-)

diff --git a/gfs2/scripts/gfs2_lockcapture b/gfs2/scripts/gfs2_lockcapture
index 2b3421c..6a63fc8 100644
--- a/gfs2/scripts/gfs2_lockcapture
+++ b/gfs2/scripts/gfs2_lockcapture
@@ -45,12 +45,15 @@ class ClusterNode:
     """
     This class represents a cluster node that is a current memeber in a cluster.
     """
-    def __init__(self, clusternodeName, clusterName, mapOfMountedFilesystemLabels):
+    def __init__(self, clusternodeName, clusternodeID, clusterName, mapOfMountedFilesystemLabels):
         """
         @param clusternodeName: The name of the cluster node.
         @type clusternodeName: String
         @param clusterName: The name of the cluster that this cluster node is a
         member of.
+        @param clusternodeID: The id of the cluster node.
+        @type clusternodeID: Int
+        @param clusterName: The name of the cluster that this cluster node is a
         @type clusterName: String
         @param mapOfMountedFilesystemLabels: A map of filesystem labels(key) for
         a mounted filesystem. The value is the line for the matching mounted
@@ -58,6 +61,7 @@ class ClusterNode:
         @type mapOfMountedFilesystemLabels: Dict
         """
         self.__clusternodeName = clusternodeName
+        self.__clusternodeID  = clusternodeID
         self.__clusterName = clusterName
         self.__mapOfMountedFilesystemLabels = mapOfMountedFilesystemLabels
 
@@ -69,7 +73,7 @@ class ClusterNode:
         @rtype: String
         """
         rString = ""
-        rString += "%s:%s" %(self.getClusterName(), self.getClusterNodeName())
+        rString += "%s:%s(id:%d)" %(self.getClusterName(), self.getClusterNodeName(), self.getClusterNodeID())
         fsLabels = self.__mapOfMountedFilesystemLabels.keys()
         fsLabels.sort()
         for fsLabel in fsLabels:
@@ -85,6 +89,14 @@ class ClusterNode:
         """
         return self.__clusternodeName
 
+    def getClusterNodeID(self):
+        """
+        Returns the id of the cluster node.
+        @return: Returns the id of the cluster node.
+        @rtype: String
+        """
+        return self.__clusternodeID
+
     def getClusterName(self):
         """
         Returns the name of cluster that this cluster node is a member of.
@@ -539,6 +551,7 @@ def getClusterNode(listOfGFS2Names):
     # in the output, else return None.
     clusterName = ""
     clusternodeName = ""
+    clusternodeID = ""
     if (runCommand("which", ["cman_tool"])):
         stdout = runCommandOutput("cman_tool", ["status"])
         if (not stdout == None):
@@ -550,6 +563,8 @@ def getClusterNode(listOfGFS2Names):
                     clusterName = line.split("Cluster Name:")[1].strip().rstrip()
                 if (line.startswith("Node name: ")):
                     clusternodeName = line.split("Node name:")[1].strip().rstrip()
+                if (line.startswith("Node ID: ")):
+                    clusternodeID = line.split("Node ID: ")[1].strip().rstrip()
     elif (runCommand("which", ["corosync-cmapctl"])):
         # Another way to get the local cluster node is: $ crm_node -i; crm_node -l
         # Get the name of the cluster.
@@ -559,14 +574,14 @@ def getClusterNode(listOfGFS2Names):
             if (len(stdoutSplit) == 2):
                 clusterName = stdoutSplit[1].strip().rstrip()
         # Get the id of the local cluster node so we can get the clusternode name
-        thisNodeID = ""
+        clusternodeID = ""
         stdout = runCommandOutput("corosync-cmapctl", ["-g", "runtime.votequorum.this_node_id"])
         if (not stdout == None):
             stdoutSplit = stdout.split("=")
             if (len(stdoutSplit) == 2):
-               thisNodeID = stdoutSplit[1].strip().rstrip()
+               clusternodeID = stdoutSplit[1].strip().rstrip()
         # Now that we the nodeid then we can get the clusternode name.
-        if (len(thisNodeID) > 0):
+        if (len(clusternodeID) > 0):
             stdout = runCommandOutput("corosync-quorumtool", ["-l"])
             if (not stdout == None):
                 for line in stdout.split("\n"):
@@ -588,7 +603,15 @@ def getClusterNode(listOfGFS2Names):
                         break
                 if ((not foundMatch) and (mapOfMountedFilesystemLabels.has_key(label))):
                     del(mapOfMountedFilesystemLabels[label])
-        return ClusterNode(clusternodeName, clusterName, mapOfMountedFilesystemLabels)
+        # Cast the node id to an int, and default is 0 if node is not found or
+        # not castable.
+        clusternodeIDInt = 0
+        if (clusternodeID.isalnum()):
+            try:
+                clusternodeIDInt = int(clusternodeID)
+            except(ValueError):
+                pass
+        return ClusterNode(clusternodeName, clusternodeIDInt, clusterName, mapOfMountedFilesystemLabels)
     else:
         return None
 
@@ -701,6 +724,28 @@ def gatherGeneralInformation(pathToDSTDir):
         message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
         logging.getLogger(MAIN_LOGGER_NAME).error(message)
 
+    # Write the status of all the nodes in the cluster out.
+    if (runCommand("which", ["cman_tool"])):
+        command = "cman_tool"
+        pathToCommandOutput = os.path.join(pathToDSTDir, "cman_tool_status")
+        try:
+            fout = open(pathToCommandOutput, "w")
+            runCommand(command, ["status"], standardOut=fout)
+            fout.close()
+        except IOError:
+            message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+    elif (runCommand("which", ["corosync-cmapctl"])):
+        command = "corosync-quorumtool"
+        pathToCommandOutput = os.path.join(pathToDSTDir, "corosync-quorumtool_l")
+        try:
+            fout = open(pathToCommandOutput, "w")
+            runCommand(command, ["-l"], standardOut=fout)
+            fout.close()
+        except IOError:
+            message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+
 
 def isProcPidStackEnabled(pathToPidData):
     """
@@ -1067,26 +1112,6 @@ if __name__ == "__main__":
             # script running.
             writeToFile(PATH_TO_PID_FILENAME, str(os.getpid()), createFile=True)
         # #######################################################################
-        # Verify they want to continue because this script will trigger sysrq events.
-        # #######################################################################
-        if (not cmdLineOpts.disableQuestions):
-            valid = {"yes":True, "y":True, "no":False, "n":False}
-            question = "This script will trigger a sysrq -t event or collect the data for each pid directory located in /proc for each run. Are you sure you want to continue?"
-            prompt = " [y/n] "
-            while True:
-                sys.stdout.write(question + prompt)
-                choice = raw_input().lower()
-                if (choice in valid):
-                    if (valid.get(choice)):
-                        # If yes, or y then exit loop and continue.
-                        break
-                    else:
-                        message = "The script will not continue since you chose not to continue."
-                        logging.getLogger(MAIN_LOGGER_NAME).error(message)
-                        exitScript(removePidFile=True, errorCode=1)
-                else:
-                    sys.stdout.write("Please respond with '(y)es' or '(n)o'.\n")
-        # #######################################################################
         # Get the clusternode name and verify that mounted GFS2 filesystems were
         # found.
         # #######################################################################
@@ -1110,6 +1135,26 @@ if __name__ == "__main__":
             print clusternode
             exitScript()
         # #######################################################################
+        # Verify they want to continue because this script will trigger sysrq events.
+        # #######################################################################
+        if (not cmdLineOpts.disableQuestions):
+            valid = {"yes":True, "y":True, "no":False, "n":False}
+            question = "This script will trigger a sysrq -t event or collect the data for each pid directory located in /proc for each run. Are you sure you want to continue?"
+            prompt = " [y/n] "
+            while True:
+                sys.stdout.write(question + prompt)
+                choice = raw_input().lower()
+                if (choice in valid):
+                    if (valid.get(choice)):
+                        # If yes, or y then exit loop and continue.
+                        break
+                    else:
+                        message = "The script will not continue since you chose not to continue."
+                        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+                        exitScript(removePidFile=True, errorCode=1)
+                else:
+                    sys.stdout.write("Please respond with '(y)es' or '(n)o'.\n")
+        # #######################################################################
         # Create the output directory to verify it can be created before
         # proceeding unless it is already created from a previous run data needs
         # to be analyzed. Probably could add more debugging on if file or dir.
@@ -1178,6 +1223,11 @@ if __name__ == "__main__":
             message = "Pass (%d/%d): Gathering general information about the host." %(i, cmdLineOpts.numberOfRuns)
             logging.getLogger(MAIN_LOGGER_NAME).debug(message)
             gatherGeneralInformation(pathToOutputRunDir)
+            # Write the clusternode name and id to the general information file.
+            writeToFile(os.path.join(pathToOutputRunDir, "hostinformation.txt"),
+                        "NODE_NAME=%s\nNODE_ID=%d" %(clusternode.getClusterNodeName(), clusternode.getClusterNodeID()),
+                        appendToFile=True, createFile=True)
+
             # Going to sleep for 2 seconds, so that TIMESTAMP should be in the
             # past in the logs so that capturing sysrq data will be guaranteed.
             time.sleep(2)
-- 
1.8.0.2



^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [Cluster-devel] [PATCH] gfs2_lockcapture: Capture the status of the cluster nodes and find the clusternode name and id.
  2013-01-31 14:41 [Cluster-devel] [PATCH] gfs2_lockcapture: Capture the status of the cluster nodes and find the clusternode name and id sbradley
@ 2013-02-05 13:23 ` Steven Whitehouse
  0 siblings, 0 replies; 2+ messages in thread
From: Steven Whitehouse @ 2013-02-05 13:23 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

Looks ok to me, so long as it does what you want it to do,

Steve.

On Thu, 2013-01-31 at 09:41 -0500, sbradley at redhat.com wrote:
> From: Shane Bradley <sbradley@redhat.com>
> 
> The status of the cluster will be captured and written to the file with respect
> to version: cman_tool nodes, corosync-quorumtool -l. Added two new configuration
> variables to the hostinformation.txt for the clusternode name and id.
> 
> Signed-off-by: Shane Bradley <sbradley@redhat.com>
> ---
>  gfs2/scripts/gfs2_lockcapture | 102 +++++++++++++++++++++++++++++++-----------
>  1 file changed, 76 insertions(+), 26 deletions(-)
> 
> diff --git a/gfs2/scripts/gfs2_lockcapture b/gfs2/scripts/gfs2_lockcapture
> index 2b3421c..6a63fc8 100644
> --- a/gfs2/scripts/gfs2_lockcapture
> +++ b/gfs2/scripts/gfs2_lockcapture
> @@ -45,12 +45,15 @@ class ClusterNode:
>      """
>      This class represents a cluster node that is a current memeber in a cluster.
>      """
> -    def __init__(self, clusternodeName, clusterName, mapOfMountedFilesystemLabels):
> +    def __init__(self, clusternodeName, clusternodeID, clusterName, mapOfMountedFilesystemLabels):
>          """
>          @param clusternodeName: The name of the cluster node.
>          @type clusternodeName: String
>          @param clusterName: The name of the cluster that this cluster node is a
>          member of.
> +        @param clusternodeID: The id of the cluster node.
> +        @type clusternodeID: Int
> +        @param clusterName: The name of the cluster that this cluster node is a
>          @type clusterName: String
>          @param mapOfMountedFilesystemLabels: A map of filesystem labels(key) for
>          a mounted filesystem. The value is the line for the matching mounted
> @@ -58,6 +61,7 @@ class ClusterNode:
>          @type mapOfMountedFilesystemLabels: Dict
>          """
>          self.__clusternodeName = clusternodeName
> +        self.__clusternodeID  = clusternodeID
>          self.__clusterName = clusterName
>          self.__mapOfMountedFilesystemLabels = mapOfMountedFilesystemLabels
>  
> @@ -69,7 +73,7 @@ class ClusterNode:
>          @rtype: String
>          """
>          rString = ""
> -        rString += "%s:%s" %(self.getClusterName(), self.getClusterNodeName())
> +        rString += "%s:%s(id:%d)" %(self.getClusterName(), self.getClusterNodeName(), self.getClusterNodeID())
>          fsLabels = self.__mapOfMountedFilesystemLabels.keys()
>          fsLabels.sort()
>          for fsLabel in fsLabels:
> @@ -85,6 +89,14 @@ class ClusterNode:
>          """
>          return self.__clusternodeName
>  
> +    def getClusterNodeID(self):
> +        """
> +        Returns the id of the cluster node.
> +        @return: Returns the id of the cluster node.
> +        @rtype: String
> +        """
> +        return self.__clusternodeID
> +
>      def getClusterName(self):
>          """
>          Returns the name of cluster that this cluster node is a member of.
> @@ -539,6 +551,7 @@ def getClusterNode(listOfGFS2Names):
>      # in the output, else return None.
>      clusterName = ""
>      clusternodeName = ""
> +    clusternodeID = ""
>      if (runCommand("which", ["cman_tool"])):
>          stdout = runCommandOutput("cman_tool", ["status"])
>          if (not stdout == None):
> @@ -550,6 +563,8 @@ def getClusterNode(listOfGFS2Names):
>                      clusterName = line.split("Cluster Name:")[1].strip().rstrip()
>                  if (line.startswith("Node name: ")):
>                      clusternodeName = line.split("Node name:")[1].strip().rstrip()
> +                if (line.startswith("Node ID: ")):
> +                    clusternodeID = line.split("Node ID: ")[1].strip().rstrip()
>      elif (runCommand("which", ["corosync-cmapctl"])):
>          # Another way to get the local cluster node is: $ crm_node -i; crm_node -l
>          # Get the name of the cluster.
> @@ -559,14 +574,14 @@ def getClusterNode(listOfGFS2Names):
>              if (len(stdoutSplit) == 2):
>                  clusterName = stdoutSplit[1].strip().rstrip()
>          # Get the id of the local cluster node so we can get the clusternode name
> -        thisNodeID = ""
> +        clusternodeID = ""
>          stdout = runCommandOutput("corosync-cmapctl", ["-g", "runtime.votequorum.this_node_id"])
>          if (not stdout == None):
>              stdoutSplit = stdout.split("=")
>              if (len(stdoutSplit) == 2):
> -               thisNodeID = stdoutSplit[1].strip().rstrip()
> +               clusternodeID = stdoutSplit[1].strip().rstrip()
>          # Now that we the nodeid then we can get the clusternode name.
> -        if (len(thisNodeID) > 0):
> +        if (len(clusternodeID) > 0):
>              stdout = runCommandOutput("corosync-quorumtool", ["-l"])
>              if (not stdout == None):
>                  for line in stdout.split("\n"):
> @@ -588,7 +603,15 @@ def getClusterNode(listOfGFS2Names):
>                          break
>                  if ((not foundMatch) and (mapOfMountedFilesystemLabels.has_key(label))):
>                      del(mapOfMountedFilesystemLabels[label])
> -        return ClusterNode(clusternodeName, clusterName, mapOfMountedFilesystemLabels)
> +        # Cast the node id to an int, and default is 0 if node is not found or
> +        # not castable.
> +        clusternodeIDInt = 0
> +        if (clusternodeID.isalnum()):
> +            try:
> +                clusternodeIDInt = int(clusternodeID)
> +            except(ValueError):
> +                pass
> +        return ClusterNode(clusternodeName, clusternodeIDInt, clusterName, mapOfMountedFilesystemLabels)
>      else:
>          return None
>  
> @@ -701,6 +724,28 @@ def gatherGeneralInformation(pathToDSTDir):
>          message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
>          logging.getLogger(MAIN_LOGGER_NAME).error(message)
>  
> +    # Write the status of all the nodes in the cluster out.
> +    if (runCommand("which", ["cman_tool"])):
> +        command = "cman_tool"
> +        pathToCommandOutput = os.path.join(pathToDSTDir, "cman_tool_status")
> +        try:
> +            fout = open(pathToCommandOutput, "w")
> +            runCommand(command, ["status"], standardOut=fout)
> +            fout.close()
> +        except IOError:
> +            message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
> +            logging.getLogger(MAIN_LOGGER_NAME).error(message)
> +    elif (runCommand("which", ["corosync-cmapctl"])):
> +        command = "corosync-quorumtool"
> +        pathToCommandOutput = os.path.join(pathToDSTDir, "corosync-quorumtool_l")
> +        try:
> +            fout = open(pathToCommandOutput, "w")
> +            runCommand(command, ["-l"], standardOut=fout)
> +            fout.close()
> +        except IOError:
> +            message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
> +            logging.getLogger(MAIN_LOGGER_NAME).error(message)
> +
>  
>  def isProcPidStackEnabled(pathToPidData):
>      """
> @@ -1067,26 +1112,6 @@ if __name__ == "__main__":
>              # script running.
>              writeToFile(PATH_TO_PID_FILENAME, str(os.getpid()), createFile=True)
>          # #######################################################################
> -        # Verify they want to continue because this script will trigger sysrq events.
> -        # #######################################################################
> -        if (not cmdLineOpts.disableQuestions):
> -            valid = {"yes":True, "y":True, "no":False, "n":False}
> -            question = "This script will trigger a sysrq -t event or collect the data for each pid directory located in /proc for each run. Are you sure you want to continue?"
> -            prompt = " [y/n] "
> -            while True:
> -                sys.stdout.write(question + prompt)
> -                choice = raw_input().lower()
> -                if (choice in valid):
> -                    if (valid.get(choice)):
> -                        # If yes, or y then exit loop and continue.
> -                        break
> -                    else:
> -                        message = "The script will not continue since you chose not to continue."
> -                        logging.getLogger(MAIN_LOGGER_NAME).error(message)
> -                        exitScript(removePidFile=True, errorCode=1)
> -                else:
> -                    sys.stdout.write("Please respond with '(y)es' or '(n)o'.\n")
> -        # #######################################################################
>          # Get the clusternode name and verify that mounted GFS2 filesystems were
>          # found.
>          # #######################################################################
> @@ -1110,6 +1135,26 @@ if __name__ == "__main__":
>              print clusternode
>              exitScript()
>          # #######################################################################
> +        # Verify they want to continue because this script will trigger sysrq events.
> +        # #######################################################################
> +        if (not cmdLineOpts.disableQuestions):
> +            valid = {"yes":True, "y":True, "no":False, "n":False}
> +            question = "This script will trigger a sysrq -t event or collect the data for each pid directory located in /proc for each run. Are you sure you want to continue?"
> +            prompt = " [y/n] "
> +            while True:
> +                sys.stdout.write(question + prompt)
> +                choice = raw_input().lower()
> +                if (choice in valid):
> +                    if (valid.get(choice)):
> +                        # If yes, or y then exit loop and continue.
> +                        break
> +                    else:
> +                        message = "The script will not continue since you chose not to continue."
> +                        logging.getLogger(MAIN_LOGGER_NAME).error(message)
> +                        exitScript(removePidFile=True, errorCode=1)
> +                else:
> +                    sys.stdout.write("Please respond with '(y)es' or '(n)o'.\n")
> +        # #######################################################################
>          # Create the output directory to verify it can be created before
>          # proceeding unless it is already created from a previous run data needs
>          # to be analyzed. Probably could add more debugging on if file or dir.
> @@ -1178,6 +1223,11 @@ if __name__ == "__main__":
>              message = "Pass (%d/%d): Gathering general information about the host." %(i, cmdLineOpts.numberOfRuns)
>              logging.getLogger(MAIN_LOGGER_NAME).debug(message)
>              gatherGeneralInformation(pathToOutputRunDir)
> +            # Write the clusternode name and id to the general information file.
> +            writeToFile(os.path.join(pathToOutputRunDir, "hostinformation.txt"),
> +                        "NODE_NAME=%s\nNODE_ID=%d" %(clusternode.getClusterNodeName(), clusternode.getClusterNodeID()),
> +                        appendToFile=True, createFile=True)
> +
>              # Going to sleep for 2 seconds, so that TIMESTAMP should be in the
>              # past in the logs so that capturing sysrq data will be guaranteed.
>              time.sleep(2)




^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2013-02-05 13:23 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-01-31 14:41 [Cluster-devel] [PATCH] gfs2_lockcapture: Capture the status of the cluster nodes and find the clusternode name and id sbradley
2013-02-05 13:23 ` Steven Whitehouse

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).