From mboxrd@z Thu Jan 1 00:00:00 1970 From: rmccabe@sourceware.org Date: 9 Nov 2006 20:32:03 -0000 Subject: [Cluster-devel] conga/luci cluster/form-macros site/luci/Exten ... Message-ID: <20061109203203.11621.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: conga Changes by: rmccabe at sourceware.org 2006-11-09 20:32:02 Modified files: luci/cluster : form-macros luci/site/luci/Extensions: cluster_adapters.py conga_constants.py Log message: fix the cluster start/stop/restart/delete actions in the actions menu so they do what they're supposed to (as opposed to nothing) Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/cluster/form-macros.diff?cvsroot=cluster&r1=1.101&r2=1.102 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/cluster_adapters.py.diff?cvsroot=cluster&r1=1.156&r2=1.157 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/conga_constants.py.diff?cvsroot=cluster&r1=1.23&r2=1.24 --- conga/luci/cluster/form-macros 2006/11/07 21:33:52 1.101 +++ conga/luci/cluster/form-macros 2006/11/09 20:32:02 1.102 @@ -89,9 +89,27 @@
@@ -1068,11 +1086,9 @@
- - + tal:define="result python: here.clusterTaskProcess(modelb, request)"/> +

Cluster Process Form

--- conga/luci/site/luci/Extensions/cluster_adapters.py 2006/11/09 14:17:08 1.156 +++ conga/luci/site/luci/Extensions/cluster_adapters.py 2006/11/09 20:32:02 1.157 @@ -112,7 +112,6 @@ def validateCreateCluster(self, request): errors = list() - messages = list() requestResults = {} if not havePermCreateCluster(self): @@ -234,7 +233,7 @@ buildClusterCreateFlags(self, batch_id_map, clusterName) response = request.RESPONSE - response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clusterName) + response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clusterName + '&busyfirst=true') def buildClusterCreateFlags(self, batch_map, clusterName): path = str(CLUSTER_FOLDER_PATH + clusterName) @@ -379,10 +378,11 @@ errors.append('An error occurred while attempting to add cluster node \"' + clunode['host'] + '\"') return (False, {'errors': errors, 'requestResults': cluster_properties}) - messages.append('Cluster join initiated for host \"' + clunode['host'] + '\"') - + messages.append('Cluster join initiated for host \"' + clunode['host'] + '\"') buildClusterCreateFlags(self, batch_id_map, clusterName) - return (True, {'errors': errors, 'messages': messages}) + + response = request.RESPONSE + response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clusterName + '&busyfirst=true') def validateServiceAdd(self, request): try: @@ -757,23 +757,14 @@ luci_log.debug_verbose('VCC0a: no model, no cluster name') return (False, {'errors': ['No cluster model was found.']}) - rc = getRicciAgent(self, cluname) - if not rc: - luci_log.debug_verbose('VCCb: no model in session, unable to find a ricci agent for the %s cluster' % cluname) - return (False, {'errors': ['No cluster model was found.']}) - try: - model = getModelBuilder(None, rc, rc.dom0()) - if not model: - raise Exception, 'model is none' - except Exception, e: - luci_log.debug_verbose('VCCc: unable to get model builder for cluster %s: %s' % (cluname, str(e))) + model = getModelForCluster(self, cluname) + except: model = None if model is None: luci_log.debug_verbose('VCC0: unable to get model from session') return (False, {'errors': ['No cluster model was found.']}) - try: if not 'configtype' in request.form: luci_log.debug_verbose('VCC2: no configtype') @@ -853,7 +844,7 @@ return (retcode, {'errors': errors, 'messages': messages}) response = request.RESPONSE - response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername) + response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername + '&busyfirst=true') def validateFenceAdd(self, request): return (True, {}) @@ -1419,7 +1410,7 @@ def getClusterAlias(self, model): alias = model.getClusterAlias() - if alias == None: + if alias is None: return model.getClusterName() else: return alias @@ -1652,7 +1643,7 @@ svc = modelb.retrieveServiceByName(item['name']) dom = svc.getAttribute("domain") - if dom != None: + if dom is not None: itemmap['faildom'] = dom else: itemmap['faildom'] = "No Failover Domain" @@ -1736,7 +1727,7 @@ #first get service by name from model svc = modelb.getService(servicename) resource_list = list() - if svc != None: + if svc is not None: indent_ctr = 0 children = svc.getChildren() for child in children: @@ -1751,7 +1742,7 @@ #Call yourself on every children #then return rc_map = {} - if parent != None: + if parent is not None: rc_map['parent'] = parent rc_map['name'] = child.getName() if child.isRefObject() == True: @@ -1968,11 +1959,11 @@ fdom_map['cfgurl'] = baseurl + "?pagetype=" + FDOM_LIST + "&clustername=" + clustername ordered_attr = fdom.getAttribute('ordered') restricted_attr = fdom.getAttribute('restricted') - if ordered_attr != None and (ordered_attr == "true" or ordered_attr == "1"): + if ordered_attr is not None and (ordered_attr == "true" or ordered_attr == "1"): fdom_map['ordered'] = True else: fdom_map['ordered'] = False - if restricted_attr != None and (restricted_attr == "true" or restricted_attr == "1"): + if restricted_attr is not None and (restricted_attr == "true" or restricted_attr == "1"): fdom_map['restricted'] = True else: fdom_map['restricted'] = False @@ -1993,7 +1984,7 @@ else: nodesmap['status'] = NODE_INACTIVE priority_attr = node.getAttribute('priority') - if priority_attr != None: + if priority_attr is not None: nodesmap['priority'] = "0" nodelist.append(nodesmap) fdom_map['nodeslist'] = nodelist @@ -2006,7 +1997,7 @@ break #found more info about service... domain = svc.getAttribute("domain") - if domain != None: + if domain is not None: if domain == fdom.getName(): svcmap = {} svcmap['name'] = svcname @@ -2018,47 +2009,52 @@ fdomlist.append(fdom_map) return fdomlist -def processClusterProps(self, ricci_agent, request): - #First, retrieve cluster.conf from session - conf = request.SESSION.get('conf') - model = ModelBuilder(0, None, None, conf) - - #Next, determine actiontype and switch on it - actiontype = request[ACTIONTYPE] - - if actiontype == BASECLUSTER: - cp = model.getClusterPtr() - cfgver = cp.getConfigVersion() - - rcfgver = request['cfgver'] - - if cfgver != rcfgver: - cint = int(cfgver) - rint = int(rcfgver) - if rint > cint: - cp.setConfigVersion(rcfgver) - - rname = request['cluname'] - name = model.getClusterAlias() - - if rname != name: - cp.addAttribute('alias', rname) - - response = request.RESPONSE - response.redirect(request['HTTP_REFERER'] + "&busyfirst=true") - return +def clusterTaskProcess(self, model, request): + try: + task = request['task'] + except: + try: + task = request.form['task'] + except: + luci_log.debug_verbose('CTP1: no task specified') + task = None - elif actiontype == FENCEDAEMON: - pass + if not model: + try: + cluname = request['clustername'] + if not cluname: + raise Exception, 'cluname is blank' + except: + try: + cluname = request.form['clustername'] + if not cluname: + raise Exception, 'cluname is blank' + except: + luci_log.debug_verbose('CTP0: no model/no cluster name') + return 'Unable to determine the cluster name.' + try: + model = getModelForCluster(self, cluname) + except Exception, e: + luci_log.debug_verbose('CPT1: GMFC failed for %s' % cluname) + model = None - elif actiontype == MULTICAST: - pass + if not model: + return 'Unable to get the model object for %s' % cluname - elif actiontype == QUORUMD: - pass + if task == CLUSTER_STOP: + clusterStop(self, model) + elif task == CLUSTER_START: + clusterStart(self, model) + elif task == CLUSTER_RESTART: + clusterRestart(self, model) + elif task == CLUSTER_DELETE: + clusterStop(self, model, delete=True) + else: + return 'An unknown cluster task was requested.' - else: - return + response = request.RESPONSE + response.redirect('%s?pagetype=%s&clustername=%s&busyfirst=true' \ + % (request['URL'], CLUSTER, model.getClusterName())) def getClusterInfo(self, model, req): try: @@ -2091,7 +2087,6 @@ luci_log.debug_verbose('GCI3: unable to get model for cluster %s: %s' % cluname, str(e)) return {} - baseurl = req['URL'] + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + cluname + "&" prop_baseurl = req['URL'] + '?' + PAGETYPE + '=' + CLUSTER_CONFIG + '&' + CLUNAME + '=' + cluname + '&' map = {} basecluster_url = prop_baseurl + PROPERTIES_TAB + "=" + PROP_GENERAL_TAB @@ -2110,10 +2105,10 @@ map['fencedaemon_url'] = fencedaemon_url fdp = model.getFenceDaemonPtr() pjd = fdp.getAttribute('post_join_delay') - if pjd == None: + if pjd is None: pjd = "6" pfd = fdp.getAttribute('post_fail_delay') - if pfd == None: + if pfd is None: pfd = "0" #post join delay map['pjd'] = pjd @@ -2152,27 +2147,27 @@ if is_quorumd: qdp = model.getQuorumdPtr() interval = qdp.getAttribute('interval') - if interval != None: + if interval is not None: map['interval'] = interval tko = qdp.getAttribute('tko') - if tko != None: + if tko is not None: map['tko'] = tko votes = qdp.getAttribute('votes') - if votes != None: + if votes is not None: map['votes'] = votes min_score = qdp.getAttribute('min_score') - if min_score != None: + if min_score is not None: map['min_score'] = min_score device = qdp.getAttribute('device') - if device != None: + if device is not None: map['device'] = device label = qdp.getAttribute('label') - if label != None: + if label is not None: map['label'] = label heuristic_kids = qdp.getChildren() @@ -2180,24 +2175,24 @@ for kid in heuristic_kids: hmap = {} hname = kid.getAttribute('name') - if hname == None: + if hname is None: hname = h_ctr h_ctr = h_ctr + 1 hprog = kid.getAttribute('program') hscore = kid.getAttribute('score') hinterval = kid.getAttribute('interval') - if hprog == None: + if hprog is None: continue - if hname != None: + if hname is not None: hmap['hname'] = hname else: hmap['hname'] = "" hmap['hprog'] = hprog - if hscore != None: + if hscore is not None: hmap['hscore'] = hscore else: hmap['hscore'] = "" - if hinterval != None: + if hinterval is not None: hmap['hinterval'] = hinterval else: hmap['hinterval'] = "" @@ -2239,6 +2234,12 @@ map['votes'] = clu['votes'] map['minquorum'] = clu['minQuorum'] map['clucfg'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_CONFIG + "&" + CLUNAME + "=" + clustername + + map['restart_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_RESTART + map['stop_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_STOP + map['start_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_START + map['delete_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_DELETE + svc_dict_list = list() for svc in svclist: svc_dict = {} @@ -2270,6 +2271,317 @@ return map +def nodeLeave(self, rc, clustername, nodename_resolved): + path = str(CLUSTER_FOLDER_PATH + clustername + '/' + nodename_resolved) + + try: + nodefolder = self.restrictedTraverse(path) + if not nodefolder: + raise Exception, 'cannot find database object at %s' % path + except Exception, e: + luci_log.debug('NLO: node_leave_cluster err: %s' % str(e)) + return None + + objname = str(nodename_resolved + "____flag") + fnpresent = noNodeFlagsPresent(self, nodefolder, objname, nodename_resolved) + + if fnpresent is None: + luci_log.debug('NL1: An error occurred while checking flags for %s' \ + % nodename_resolved) + return None + + if fnpresent == False: + luci_log.debug('NL2: flags are still present for %s -- bailing out' \ + % nodename_resolved) + return None + + batch_number, result = nodeLeaveCluster(rc) + if batch_number is None or result is None: + luci_log.debug_verbose('NL3: nodeLeaveCluster error: batch_number and/or result is None') + return None + + try: + set_node_flag(self, clustername, rc.hostname(), batch_number, NODE_LEAVE_CLUSTER, "Node \'%s\' leaving cluster" % nodename_resolved) + except Exception, e: + luci_log.debug_verbose('NL4: failed to set flags: %s' % str(e)) + return True + +def nodeJoin(self, rc, clustername, nodename_resolved): + batch_number, result = nodeJoinCluster(rc) + if batch_number is None or result is None: + luci_log.debug_verbose('NJ0: batch_number and/or result is None') + return None + + try: + set_node_flag(self, clustername, rc.hostname(), batch_number, NODE_JOIN_CLUSTER, "Node \'%s\' joining cluster" % nodename_resolved) + except Exception, e: + luci_log.debug_verbose('NJ1: failed to set flags: %s' % str(e)) + return True + +def clusterStart(self, model): + if model is None: + return None + + clustername = model.getClusterName() + nodes = model.getNodes() + if not nodes or len(nodes) < 1: + return None + + errors = 0 + for node in nodes: + nodename = node.getName().strip() + nodename_resolved = resolve_nodename(self, clustername, nodename) + + try: + rc = RicciCommunicator(nodename_resolved) + except Exception, e: + luci_log.debug_verbose('CStart: RC %s: %s' \ + % (nodename_resolved, str(e))) + errors += 1 + continue + if nodeJoin(self, rc, clustername, nodename_resolved) is None: + luci_log.debug_verbose('CStart1: nodeLeave %s' % nodename_resolved) + errors += 1 + + return errors + +def clusterStop(self, model, delete=False): + if model is None: + return None + + clustername = model.getClusterName() + nodes = model.getNodes() + if not nodes or len(nodes) < 1: + return None + + errors = 0 + for node in nodes: + nodename = node.getName().strip() + nodename_resolved = resolve_nodename(self, clustername, nodename) + + try: + rc = RicciCommunicator(nodename_resolved) + except Exception, e: + luci_log.debug_verbose('[%d] CStop0: RC %s: %s' \ + % (delete, nodename_resolved, str(e))) + errors += 1 + continue + if nodeLeave(self, rc, clustername, nodename_resolved) is None: + luci_log.debug_verbose('[%d] CStop1: nodeLeave %s' \ + % (delete, nodename_resolved)) + errors += 1 + return errors + +def clusterRestart(self, model): + snum_err = clusterStop(self, model) + if snum_err: + luci_log.debug_verbose('cluRestart0: clusterStop: %d errs' % snum_err) + jnum_err = clusterStart(self, model) + if jnum_err: + luci_log.debug_verbose('cluRestart0: clusterStart: %d errs' % jnum_err) + return snum_err + jnum_err + +def clusterDelete(self, model): + return clusterStop(self, model, delete=True) + +def forceNodeReboot(self, rc, clustername, nodename_resolved): + batch_number, result = nodeReboot(rc) + if batch_number is None or result is None: + luci_log.debug_verbose('FNR0: batch_number and/or result is None') + return None + + try: + set_node_flag(self, clustername, rc.hostname(), batch_number, NODE_REBOOT, "Node \'%s\' is being rebooted" % nodename_resolved) + except Exception, e: + luci_log.debug_verbose('FNR1: failed to set flags: %s' % str(e)) + return True + +def forceNodeFence(self, clustername, nodename, nodename_resolved): + path = str(CLUSTER_FOLDER_PATH + clustername) + + try: + clusterfolder = self.restrictedTraverse(path) + if not clusterfolder: + raise Exception, 'no cluster folder at %s' % path + except Exception, e: + luci_log.debug('FNF0: The cluster folder %s could not be found: %s' \ + % (clustername, str(e))) + return None + + try: + nodes = clusterfolder.objectItems('Folder') + if not nodes or len(nodes) < 1: + raise Exception, 'no cluster nodes' + except Exception, e: + luci_log.debug('FNF1: No cluster nodes for %s were found: %s' \ + % (clustername, str(e))) + return None + + found_one = False + for node in nodes: + if node[1].getId().find(nodename) != (-1): + continue + + try: + rc = RicciCommunicator(node[1].getId()) + if not rc: + raise Exception, 'rc is None' + except Exception, e: + luci_log.debug('FNF2: ricci error for host %s: %s' \ + % (node[0], str(e))) + continue + + if not rc.authed(): + rc = None + try: + snode = getStorageNode(self, node[1].getId()) + setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) + except: + pass + + try: + setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH) + except: + pass + + continue + found_one = True + break + + if not found_one: + return None + + batch_number, result = nodeFence(rc, nodename) + if batch_number is None or result is None: + luci_log.debug_verbose('FNF3: batch_number and/or result is None') + return None + + try: + set_node_flag(self, clustername, rc.hostname(), batch_number, NODE_FENCE, "Node \'%s\' is being fenced" % nodename_resolved) + except Exception, e: + luci_log.debug_verbose('FNF4: failed to set flags: %s' % str(e)) + return True + +def nodeDelete(self, rc, model, clustername, nodename, nodename_resolved): + #We need to get a node name other than the node + #to be deleted, then delete the node from the cluster.conf + #and propogate it. We will need two ricci agents for this task. + + # Make sure we can find a second node before we hose anything. + path = str(CLUSTER_FOLDER_PATH + clustername) + try: + clusterfolder = self.restrictedTraverse(path) + if not clusterfolder: + raise Exception, 'no cluster folder at %s' % path + except Exception, e: + luci_log.debug_verbose('ND0: node delete error for cluster %s: %s' \ + % (clustername, str(e))) + return None + + try: + nodes = clusterfolder.objectItems('Folder') + if not nodes or len(nodes) < 1: + raise Exception, 'no cluster nodes in DB' + except Exception, e: + luci_log.debug_verbose('ND1: node delete error for cluster %s: %s' \ + % (clustername, str(e))) + + found_one = False + for node in nodes: + if node[1].getId().find(nodename) != (-1): + continue + #here we make certain the node is up... + # XXX- we should also make certain this host is still + # in the cluster we believe it is. + try: + rc2 = RicciCommunicator(node[1].getId()) + except Exception, e: + luci_log.info('ND2: ricci %s error: %s' % (node[0], str(e))) + continue + + if not rc2.authed(): + try: + setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH) + except: + pass + + try: + snode = getStorageNode(self, node[0]) + setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) + except: + pass + + luci_log.debug_verbose('ND3: %s is not authed' % node[0]) + rc2 = None + continue + else: + found_one = True + break + + if not found_one: + luci_log.debug_verbose('ND4: unable to find ricci agent to delete %s from %s' % (nodename, clustername)) + return None + + #First, delete cluster.conf from node to be deleted. + #next, have node leave cluster. + batch_number, result = nodeLeaveCluster(rc, purge=True) + if batch_number is None or result is None: + luci_log.debug_verbose('ND5: batch_number and/or result is None') + return None + + #It is not worth flagging this node in DB, as we are going + #to delete it anyway. Now, we need to delete node from model + #and send out new cluster.conf + delete_target = None + nodelist = model.getNodes() + find_node = lower(nodename) + for n in nodelist: + try: + if lower(n.getName()) == find_node: + delete_target = n + break + except: + continue + + if delete_target is None: + luci_log.debug_verbose('ND6: unable to find delete target for %s in %s' \ + % (nodename, clustername)) + return None + + model.deleteNode(delete_target) + + try: + str_buf = model.exportModelAsString() + if not str_buf: + raise Exception, 'model string is blank' + except Exception, e: + luci_log.debug_verbose('ND7: exportModelAsString: %s' % str(e)) + return None + + # propagate the new cluster.conf via the second node + batch_number, result = setClusterConf(rc2, str(str_buf)) + if batch_number is None: + luci_log.debug_verbose('ND8: batch number is None after del node in NTP') + return None + + #Now we need to delete the node from the DB + path = str(CLUSTER_FOLDER_PATH + clustername) + del_path = str(path + '/' + nodename_resolved) + + try: + delnode = self.restrictedTraverse(del_path) + clusterfolder = self.restrictedTraverse(path) + clusterfolder.manage_delObjects(delnode[0]) + except Exception, e: + luci_log.debug_verbose('ND9: error deleting %s: %s' \ + % (del_path, str(e))) + + try: + set_node_flag(self, clustername, rc2.hostname(), batch_number, NODE_DELETE, "Deleting node \'%s\'" % nodename_resolved) + except Exception, e: + luci_log.debug_verbose('ND10: failed to set flags: %s' % str(e)) + return True + def nodeTaskProcess(self, model, request): try: clustername = request['clustername'] @@ -2345,312 +2657,41 @@ return None if task == NODE_LEAVE_CLUSTER: - path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved) - - try: - nodefolder = self.restrictedTraverse(path) - if not nodefolder: - raise Exception, 'cannot find directory at %s' % path - except Exception, e: - luci_log.debug('node_leave_cluster err: %s' % str(e)) - return None - - objname = str(nodename_resolved + "____flag") - - fnpresent = noNodeFlagsPresent(self, nodefolder, objname, nodename_resolved) - if fnpresent is None: - luci_log.debug('An error occurred while checking flags for %s' \ - % nodename_resolved) + if nodeLeave(self, rc, clustername, nodename_resolved) is None: + luci_log.debug_verbose('NTP: nodeLeave failed') return None - if fnpresent == False: - luci_log.debug('flags are still present for %s -- bailing out' \ - % nodename_resolved) - return None - - batch_number, result = nodeLeaveCluster(rc) - if batch_number is None or result is None: - luci_log.debug_verbose('nodeLeaveCluster error: batch_number and/or result is None') - return None - - batch_id = str(batch_number) - objpath = str(path + "/" + objname) - - try: - nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, NODE_LEAVE_CLUSTER, "string") - flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' leaving cluster", "string") - except: - luci_log.debug('An error occurred while setting flag %s' % objpath) - - response = request.RESPONSE #Is this correct? Should we re-direct to the cluster page? + response = request.RESPONSE response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername) elif task == NODE_JOIN_CLUSTER: - batch_number, result = nodeJoinCluster(rc) - if batch_number is None or result is None: - luci_log.debug_verbose('nodeJoin error: batch_number and/or result is None') + if nodeJoin(self, rc, clustername, nodename_resolved) is None: + luci_log.debug_verbose('NTP: nodeJoin failed') return None - path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved) - batch_id = str(batch_number) - objname = str(nodename_resolved + "____flag") - objpath = str(path + "/" + objname) - - try: - nodefolder = self.restrictedTraverse(path) - nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, NODE_JOIN_CLUSTER, "string") - flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' joining cluster", "string") - except Exception, e: - luci_log.debug_verbose('nodeJoin error: creating flags at %s: %s' \ - % (path, str(e))) - - response = request.RESPONSE #Once again, is this correct? Should we re-direct to the cluster page? + response = request.RESPONSE response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername) elif task == NODE_REBOOT: - batch_number, result = nodeReboot(rc) - if batch_number is None or result is None: - luci_log.debug_verbose('nodeReboot: batch_number and/or result is None') + if forceNodeReboot(self, rc, clustername, nodename_resolved) is None: + luci_log.debug_verbose('NTP: nodeReboot failed') return None - path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved) - batch_id = str(batch_number) - objname = str(nodename_resolved + "____flag") - objpath = str(path + "/" + objname) - - try: - nodefolder = self.restrictedTraverse(path) - nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, NODE_REBOOT, "string") - flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being rebooted", "string") - except Exception, e: - luci_log.debug_verbose('nodeReboot err: creating flags at %s: %s' \ - % (path, str(e))) - - response = request.RESPONSE #Once again, is this correct? Should we re-direct to the cluster page? + response = request.RESPONSE response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername) elif task == NODE_FENCE: - #here, we DON'T want to open connection to node to be fenced. - path = str(CLUSTER_FOLDER_PATH + clustername) - try: - clusterfolder = self.restrictedTraverse(path) - if not clusterfolder: - raise Exception, 'no cluster folder at %s' % path - except Exception, e: - luci_log.debug('The cluster folder for %s could not be found: %s' \ - % (clustername, str(e))) + if forceNodeFence(self, clustername, nodename, nodename_resolved) is None: + luci_log.debug_verbose('NTP: nodeFencefailed') return None - try: - nodes = clusterfolder.objectItems('Folder') - if not nodes or len(nodes) < 1: - raise Exception, 'no cluster nodes' - except Exception, e: - luci_log.debug('No cluster nodes for %s were found: %s' \ - % (clustername, str(e))) - return None - - found_one = False - for node in nodes: - if node[1].getId().find(nodename) != (-1): - continue - - try: - rc = RicciCommunicator(node[1].getId()) - if not rc: - raise Exception, 'rc is None' - except Exception, e: - luci_log.debug('ricci error for host %s: %s' \ - % (node[0], str(e))) - continue - - if not rc.authed(): - rc = None - try: - snode = getStorageNode(self, node[1].getId()) - setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) - except: - pass - - try: - setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH) - except: - pass - - continue - found_one = True - break - - if not found_one: - return None - - batch_number, result = nodeFence(rc, nodename) - if batch_number is None or result is None: - luci_log.debug_verbose('nodeFence: batch_number and/or result is None') - return None - - path = str(path + "/" + nodename_resolved) - batch_id = str(batch_number) - objname = str(nodename_resolved + "____flag") - objpath = str(path + "/" + objname) - - try: - nodefolder = self.restrictedTraverse(path) - nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, NODE_FENCE, "string") - flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being fenced", "string") - except Exception, e: - luci_log.debug_verbose('nodeFence err: creating flags at %s: %s' \ - % (path, str(e))) - - response = request.RESPONSE #Once again, is this correct? Should we re-direct to the cluster page? + response = request.RESPONSE response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername) elif task == NODE_DELETE: - #We need to get a node name other than the node - #to be deleted, then delete the node from the cluster.conf - #and propogate it. We will need two ricci agents for this task. - - # Make sure we can find a second node before we hose anything. - path = str(CLUSTER_FOLDER_PATH + clustername) - try: - clusterfolder = self.restrictedTraverse(path) - if not clusterfolder: - raise Exception, 'no cluster folder at %s' % path - except Exception, e: - luci_log.debug_verbose('node delete error for cluster %s: %s' \ - % (clustername, str(e))) - return None - - try: - nodes = clusterfolder.objectItems('Folder') - if not nodes or len(nodes) < 1: - raise Exception, 'no cluster nodes in DB' - except Exception, e: - luci_log.debug_verbose('node delete error for cluster %s: %s' \ - % (clustername, str(e))) - - found_one = False - for node in nodes: - if node[1].getId().find(nodename) != (-1): - continue - #here we make certain the node is up... - # XXX- we should also make certain this host is still - # in the cluster we believe it is. - try: - rc2 = RicciCommunicator(node[1].getId()) - except Exception, e: - luci_log.info('ricci %s error: %s' % (node[0], str(e))) - continue - except: - continue - - if not rc2.authed(): - try: - setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH) - except: - pass - - try: - snode = getStorageNode(self, node[0]) - setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) - except: - pass - - luci_log.debug_verbose('%s is not authed' % node[0]) - rc2 = None - continue - else: - found_one = True - break - - if not found_one: - luci_log.debug_verbose('unable to find ricci node to delete %s from %s' % (nodename, clustername)) + if nodeDelete(self, rc, model, clustername, nodename, nodename_resolved) is None: + luci_log.debug_verbose('NTP: nodeDelete failed') return None - - #First, delete cluster.conf from node to be deleted. - #next, have node leave cluster. - batch_number, result = nodeLeaveCluster(rc, purge=True) - if batch_number is None or result is None: - luci_log.debug_verbose('nodeDelete: batch_number and/or result is None') - return None - - #It is not worth flagging this node in DB, as we are going - #to delete it anyway. Now, we need to delete node from model - #and send out new cluster.conf - delete_target = None - nodelist = model.getNodes() - find_node = lower(nodename) - for n in nodelist: - try: - if lower(n.getName()) == find_node: - delete_target = n - break - except: - continue - - if delete_target is None: - luci_log.debug_verbose('unable to find delete target for %s in %s' \ - % (nodename, clustername)) - return None - - model.deleteNode(delete_target) - - try: - str_buf = model.exportModelAsString() - if not str_buf: - raise Exception, 'model string is blank' - except Exception, e: - luci_log.debug_verbose('NTP exportModelAsString: %s' % str(e)) - return None - - # propagate the new cluster.conf via the second node - batch_number, result = setClusterConf(rc2, str(str_buf)) - if batch_number is None: - luci_log.debug_verbose('batch number is None after del node in NTP') - return None - - #Now we need to delete the node from the DB - path = str(CLUSTER_FOLDER_PATH + clustername) - del_path = str(path + "/" + nodename_resolved) - - try: - delnode = self.restrictedTraverse(del_path) - clusterfolder = self.restrictedTraverse(path) - clusterfolder.manage_delObjects(delnode[0]) - except Exception, e: - luci_log.debug_verbose('error deleting %s: %s' % (del_path, str(e))) - - batch_id = str(batch_number) - objname = str(nodename_resolved + "____flag") - objpath = str(path + "/" + objname) - - try: - clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, NODE_DELETE, "string") - flag.manage_addProperty(FLAG_DESC, "Deleting node \'" + nodename + "\'", "string") - except Exception, e: - luci_log.debug_verbose('nodeDelete %s err setting flag at %s: %s' \ - % (nodename, objpath, str(e))) - response = request.RESPONSE response.redirect(request['HTTP_REFERER'] + "&busyfirst=true") @@ -2951,7 +2992,8 @@ except: fd = None #Set to None in case last time thru loop continue - if fd != None: + + if fd is not None: if fd.isShared() == False: #Not a shared dev...build struct and add fencedev = {} fencedev['prettyname'] = FENCE_OPTS[fd.getAgentType()] @@ -2974,7 +3016,7 @@ last_kid_fd = None level1.append(fencedev) else: #This dev is shared - if (last_kid_fd != None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd + if (last_kid_fd is not None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd instance_struct = {} instance_struct['id'] = str(minor_num) minor_num = minor_num + 1 @@ -3045,7 +3087,7 @@ except: fd = None #Set to None in case last time thru loop continue - if fd != None: + if fd is not None: if fd.isShared() == False: #Not a shared dev...build struct and add fencedev = {} fencedev['prettyname'] = FENCE_OPTS[fd.getAgentType()] @@ -3068,7 +3110,7 @@ last_kid_fd = None level2.append(fencedev) else: #This dev is shared - if (last_kid_fd != None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd + if (last_kid_fd is not None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd instance_struct = {} instance_struct['id'] = str(minor_num) minor_num = minor_num + 1 @@ -3584,7 +3626,7 @@ def getResourceInfo(modelb, request): if not modelb: - luci_log.debug_verbose('no modelb obj in getResourceInfo') + luci_log.debug_verbose('GRI0: no modelb object in session') return {} name = None @@ -4539,6 +4581,24 @@ modelb.setIsVirtualized(isVirtualized) return modelb +def getModelForCluster(self, clustername): + rc = getRicciAgent(self, clustername) + if not rc: + luci_log.debug_verbose('GMFC0: unable to find a ricci agent for %s' \ + % clustername) + return None + + try: + model = getModelBuilder(None, rc, rc.dom0()) + if not model: + raise Exception, 'model is none' + except Exception, e: + luci_log.debug_verbose('GMFC1: unable to get model builder for %s: %s' \ + % (clustername, str(e))) + return None + + return model + def set_node_flag(self, cluname, agent, batchid, task, desc): path = str(CLUSTER_FOLDER_PATH + cluname) batch_id = str(batchid) @@ -4551,7 +4611,7 @@ flag = self.restrictedTraverse(objpath) flag.manage_addProperty(BATCH_ID, batch_id, 'string') flag.manage_addProperty(TASKTYPE, task, 'string') - flag.manage_addProperty(FLAG_DESC, desc) + flag.manage_addProperty(FLAG_DESC, desc, 'string') except Exception, e: errmsg = 'Error creating flag (%s,%s,%s)@%s: %s' \ % (batch_id, task, desc, objpath, str(e)) --- conga/luci/site/luci/Extensions/conga_constants.py 2006/11/06 23:55:23 1.23 +++ conga/luci/site/luci/Extensions/conga_constants.py 2006/11/09 20:32:02 1.24 @@ -43,6 +43,12 @@ FENCEDEV_CONFIG="53" FENCEDEV="54" +#Cluster tasks +CLUSTER_STOP = '1000' +CLUSTER_START = '1001' +CLUSTER_RESTART = '1002' +CLUSTER_DELETE = '1003' + #General tasks NODE_LEAVE_CLUSTER="100" NODE_JOIN_CLUSTER="101"