From mboxrd@z Thu Jan 1 00:00:00 1970 From: rmccabe@sourceware.org Date: 14 Dec 2006 17:03:00 -0000 Subject: [Cluster-devel] conga/luci/site/luci/Extensions cluster_adapte ... Message-ID: <20061214170300.32387.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: conga Changes by: rmccabe at sourceware.org 2006-12-14 17:02:57 Modified files: luci/site/luci/Extensions: cluster_adapters.py Log message: fix for the case in delete cluster where we delete the last remaining node Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/cluster_adapters.py.diff?cvsroot=cluster&r1=1.180&r2=1.181 --- conga/luci/site/luci/Extensions/cluster_adapters.py 2006/12/11 22:42:34 1.180 +++ conga/luci/site/luci/Extensions/cluster_adapters.py 2006/12/14 17:02:56 1.181 @@ -26,7 +26,7 @@ from clusterOS import resolveOSType from FenceHandler import FenceHandler, FENCE_OPTS from GeneralError import GeneralError -from homebase_adapters import nodeUnauth, nodeAuth, manageCluster, createClusterSystems, havePermCreateCluster, setNodeFlag, delNodeFlag, userAuthenticated, getStorageNode, getClusterNode +from homebase_adapters import nodeUnauth, nodeAuth, manageCluster, createClusterSystems, havePermCreateCluster, setNodeFlag, delNodeFlag, userAuthenticated, getStorageNode, getClusterNode, delCluster from LuciSyslog import LuciSyslog #Policy for showing the cluster chooser menu: @@ -2807,14 +2807,20 @@ try: rc = RicciCommunicator(nodename_resolved) except Exception, e: - luci_log.debug_verbose('[%d] CStop0: RC %s: %s' \ + luci_log.debug_verbose('CStop0: [%d] RC %s: %s' \ % (delete, nodename_resolved, str(e))) errors += 1 continue - if nodeLeave(self, rc, clustername, nodename_resolved) is None: - luci_log.debug_verbose('[%d] CStop1: nodeLeave %s' \ - % (delete, nodename_resolved)) - errors += 1 + + if delete is True: + if nodeDelete(self, rc, model, clustername, nodename, nodename_resolved) is None: + luci_log.debug_verbose('CStop1: nodeDelete failed') + errors += 1 + else: + if nodeLeave(self, rc, clustername, nodename_resolved) is None: + luci_log.debug_verbose('CStop2: nodeLeave %s' \ + % (delete, nodename_resolved)) + errors += 1 return errors def clusterRestart(self, model): @@ -2827,7 +2833,18 @@ return snum_err + jnum_err def clusterDelete(self, model): - return clusterStop(self, model, delete=True) + if clusterStop(self, model, delete=True) < 1: + try: + clustername = model.getClusterName() + except Exception, e: + luci_log.debug_verbose('clusterDelete0: unable to get cluster name') + return None + + try: + delCluster(self, clustername) + except Exception, e: + luci_log.debug_verbose('clusterDelete0: %s: %s' \ + % (clustername, str(e))) def forceNodeReboot(self, rc, clustername, nodename_resolved): batch_number, result = nodeReboot(rc) @@ -2908,75 +2925,89 @@ return True def nodeDelete(self, rc, model, clustername, nodename, nodename_resolved): - #We need to get a node name other than the node - #to be deleted, then delete the node from the cluster.conf - #and propogate it. We will need two ricci agents for this task. + # We need to get a node name other than the node + # to be deleted, then delete the node from the cluster.conf + # and propogate it. We will need two ricci agents for this task, + # unless we are deleting the last remaining cluster node. + + if len(model.getNodes()) == 1: + # If this is the last cluster node, we don't need a second + # node to propagate a new cluster.conf file. We need only to + # stop the final node and delete its cluster.conf file. + last_node = True + else: + # Make sure we can find a second node before we hose anything. - # Make sure we can find a second node before we hose anything. - path = str(CLUSTER_FOLDER_PATH + clustername) - try: - clusterfolder = self.restrictedTraverse(path) - if not clusterfolder: - raise Exception, 'no cluster folder at %s' % path - except Exception, e: - luci_log.debug_verbose('ND0: node delete error for cluster %s: %s' \ - % (clustername, str(e))) - return None + last_node = False + found_one = False - try: - nodes = clusterfolder.objectItems('Folder') - if not nodes or len(nodes) < 1: - raise Exception, 'no cluster nodes in DB' - except Exception, e: - luci_log.debug_verbose('ND1: node delete error for cluster %s: %s' \ - % (clustername, str(e))) + path = str(CLUSTER_FOLDER_PATH + clustername) - found_one = False - for node in nodes: - if node[1].getId().find(nodename) != (-1): - continue - #here we make certain the node is up... - # XXX- we should also make certain this host is still - # in the cluster we believe it is. try: - rc2 = RicciCommunicator(node[1].getId()) + clusterfolder = self.restrictedTraverse(path) + if not clusterfolder: + raise Exception, 'no cluster folder@%s' % path except Exception, e: - luci_log.info('ND2: ricci %s error: %s' % (node[0], str(e))) - continue + luci_log.debug_verbose('ND0: node delete error for cluster %s: %s' \ + % (clustername, str(e))) + return None - if not rc2.authed(): - try: - setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH) - except: - pass + try: + nodes = clusterfolder.objectItems('Folder') + if not nodes or len(nodes) < 1: + raise Exception, 'no cluster nodes in DB' + except Exception, e: + luci_log.debug_verbose('ND1: node delete error for cluster %s: %s' \ + % (clustername, str(e))) + + for node in nodes: + if node[1].getId().find(nodename) != (-1): + continue + # here we make certain the node is up... + # XXX- we should also make certain this host is still + # in the cluster we believe it is. try: - snode = getStorageNode(self, node[0]) - setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) - except: - pass + rc2 = RicciCommunicator(node[1].getId()) + if not rc2: + raise Exception, 'ND1a: rc2 is None' + except Exception, e: + luci_log.info('ND2: ricci %s error: %s' % (node[0], str(e))) + continue - luci_log.debug_verbose('ND3: %s is not authed' % node[0]) - rc2 = None - continue - else: - found_one = True - break + if not rc2.authed(): + try: + setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH) + except: + pass - if not found_one: - luci_log.debug_verbose('ND4: unable to find ricci agent to delete %s from %s' % (nodename, clustername)) - return None + try: + snode = getStorageNode(self, node[0]) + setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) + except: + pass + + luci_log.debug_verbose('ND3: %s is not authed' % node[0]) + rc2 = None + continue + else: + found_one = True + break + + if not found_one: + luci_log.debug_verbose('ND4: unable to find ricci agent to delete %s from %s' % (nodename, clustername)) + return None - #First, delete cluster.conf from node to be deleted. - #next, have node leave cluster. + # First, delete cluster.conf from node to be deleted. + # next, have node leave cluster. batch_number, result = nodeLeaveCluster(rc, purge=True) if batch_number is None or result is None: luci_log.debug_verbose('ND5: batch_number and/or result is None') return None - #It is not worth flagging this node in DB, as we are going - #to delete it anyway. Now, we need to delete node from model - #and send out new cluster.conf + # It is not worth flagging this node in DB, as we are going + # to delete it anyway. Now, we need to delete node from model + # and send out new cluster.conf delete_target = None nodelist = model.getNodes() find_node = lower(nodename) @@ -2989,27 +3020,32 @@ continue if delete_target is None: - luci_log.debug_verbose('ND6: unable to find delete target for %s in %s' \ + luci_log.debug_verbose('ND6: no delete target for %s in cluster %s' \ % (nodename, clustername)) return None - model.deleteNode(delete_target) - try: - str_buf = model.exportModelAsString() - if not str_buf: - raise Exception, 'model string is blank' + model.deleteNode(delete_target) except Exception, e: - luci_log.debug_verbose('ND7: exportModelAsString: %s' % str(e)) - return None + luci_log.debug_verbose('ND6a: deleteNode %s failed: %s' \ + % (delete_target.getName(), str(e))) - # propagate the new cluster.conf via the second node - batch_number, result = setClusterConf(rc2, str(str_buf)) - if batch_number is None: - luci_log.debug_verbose('ND8: batch number is None after del node in NTP') - return None + if not last_node: + try: + str_buf = model.exportModelAsString() + if not str_buf: + raise Exception, 'model string is blank' + except Exception, e: + luci_log.debug_verbose('ND7: exportModelAsString: %s' % str(e)) + return None - #Now we need to delete the node from the DB + # propagate the new cluster.conf via the second node + batch_number, result = setClusterConf(rc2, str(str_buf)) + if batch_number is None: + luci_log.debug_verbose('ND8: batch number is None after del node in NTP') + return None + + # Now we need to delete the node from the DB path = str(CLUSTER_FOLDER_PATH + clustername) del_path = str(path + '/' + nodename_resolved) @@ -3021,10 +3057,12 @@ luci_log.debug_verbose('ND9: error deleting %s: %s' \ % (del_path, str(e))) - try: - set_node_flag(self, clustername, rc2.hostname(), str(batch_number), NODE_DELETE, "Deleting node \'%s\'" % nodename_resolved) - except Exception, e: - luci_log.debug_verbose('ND10: failed to set flags: %s' % str(e)) + if not last_node: + try: + set_node_flag(self, clustername, rc2.hostname(), str(batch_number), NODE_DELETE, "Deleting node \'%s\'" % nodename_resolved) + except Exception, e: + luci_log.debug_verbose('ND10: failed to set flags: %s' % str(e)) + return True def nodeTaskProcess(self, model, request):