From: rmccabe@sourceware.org <rmccabe@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] conga/luci cluster/form-macros site/luci/Exten ...
Date: 9 Nov 2006 20:32:03 -0000 [thread overview]
Message-ID: <20061109203203.11621.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: conga
Changes by: rmccabe at sourceware.org 2006-11-09 20:32:02
Modified files:
luci/cluster : form-macros
luci/site/luci/Extensions: cluster_adapters.py
conga_constants.py
Log message:
fix the cluster start/stop/restart/delete actions in the actions menu so they do what they're supposed to (as opposed to nothing)
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/cluster/form-macros.diff?cvsroot=cluster&r1=1.101&r2=1.102
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/cluster_adapters.py.diff?cvsroot=cluster&r1=1.156&r2=1.157
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/conga_constants.py.diff?cvsroot=cluster&r1=1.23&r2=1.24
--- conga/luci/cluster/form-macros 2006/11/07 21:33:52 1.101
+++ conga/luci/cluster/form-macros 2006/11/09 20:32:02 1.102
@@ -89,9 +89,27 @@
<td class="cluster cluster_action">
<form method="post" onSubmit="return dropdown(this.gourl)">
<select name="gourl" id="cluster_action" class="cluster">
- <option tal:condition="python: 'running' in cstatus and cstatus['running'] != 'true'" value="" class="cluster running">Start this cluster</option>
- <option tal:condition="python: 'running' in cstatus and cstatus['running'] == 'true'" value="" class="cluster stopped">Stop this cluster</option>
- <option value="" class="cluster">Restart this cluster</option>
+ <option class="cluster running"
+ tal:condition="python: 'running' in cstatus and cstatus['running'] != 'true'"
+ tal:attributes="value cstatus/start_url | nothing">
+ Start this cluster
+ </option>
+
+ <option class="cluster"
+ tal:attributes="value cstatus/restart_url | nothing">
+ Restart this cluster
+ </option>
+
+ <option class="cluster stopped"
+ tal:condition="python: 'running' in cstatus and cstatus['running'] == 'true'"
+ tal:attributes="value cstatus/stop_url | nothing">
+ Stop this cluster
+ </option>
+
+ <option class="cluster stopped"
+ tal:attributes="value cstatus/delete_url | nothing">
+ Delete this cluster
+ </option>
</select>
<input class="cluster" type="submit" value="Go" />
</form>
@@ -1068,11 +1086,9 @@
</div>
<div metal:define-macro="clusterprocess-form">
- <tal:block tal:define="
- global ricci_agent ri_agent | python: here.getRicciAgentForCluster(request)" />
-
<tal:block
- tal:define="res python: here.processClusterProps(ricci_agent, request)" />
+ tal:define="result python: here.clusterTaskProcess(modelb, request)"/>
+ <h2>Cluster Process Form</h2>
</div>
<div metal:define-macro="fence-option-list">
--- conga/luci/site/luci/Extensions/cluster_adapters.py 2006/11/09 14:17:08 1.156
+++ conga/luci/site/luci/Extensions/cluster_adapters.py 2006/11/09 20:32:02 1.157
@@ -112,7 +112,6 @@
def validateCreateCluster(self, request):
errors = list()
- messages = list()
requestResults = {}
if not havePermCreateCluster(self):
@@ -234,7 +233,7 @@
buildClusterCreateFlags(self, batch_id_map, clusterName)
response = request.RESPONSE
- response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clusterName)
+ response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clusterName + '&busyfirst=true')
def buildClusterCreateFlags(self, batch_map, clusterName):
path = str(CLUSTER_FOLDER_PATH + clusterName)
@@ -379,10 +378,11 @@
errors.append('An error occurred while attempting to add cluster node \"' + clunode['host'] + '\"')
return (False, {'errors': errors, 'requestResults': cluster_properties})
- messages.append('Cluster join initiated for host \"' + clunode['host'] + '\"')
-
+ messages.append('Cluster join initiated for host \"' + clunode['host'] + '\"')
buildClusterCreateFlags(self, batch_id_map, clusterName)
- return (True, {'errors': errors, 'messages': messages})
+
+ response = request.RESPONSE
+ response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clusterName + '&busyfirst=true')
def validateServiceAdd(self, request):
try:
@@ -757,23 +757,14 @@
luci_log.debug_verbose('VCC0a: no model, no cluster name')
return (False, {'errors': ['No cluster model was found.']})
- rc = getRicciAgent(self, cluname)
- if not rc:
- luci_log.debug_verbose('VCCb: no model in session, unable to find a ricci agent for the %s cluster' % cluname)
- return (False, {'errors': ['No cluster model was found.']})
-
try:
- model = getModelBuilder(None, rc, rc.dom0())
- if not model:
- raise Exception, 'model is none'
- except Exception, e:
- luci_log.debug_verbose('VCCc: unable to get model builder for cluster %s: %s' % (cluname, str(e)))
+ model = getModelForCluster(self, cluname)
+ except:
model = None
if model is None:
luci_log.debug_verbose('VCC0: unable to get model from session')
return (False, {'errors': ['No cluster model was found.']})
-
try:
if not 'configtype' in request.form:
luci_log.debug_verbose('VCC2: no configtype')
@@ -853,7 +844,7 @@
return (retcode, {'errors': errors, 'messages': messages})
response = request.RESPONSE
- response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
+ response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername + '&busyfirst=true')
def validateFenceAdd(self, request):
return (True, {})
@@ -1419,7 +1410,7 @@
def getClusterAlias(self, model):
alias = model.getClusterAlias()
- if alias == None:
+ if alias is None:
return model.getClusterName()
else:
return alias
@@ -1652,7 +1643,7 @@
svc = modelb.retrieveServiceByName(item['name'])
dom = svc.getAttribute("domain")
- if dom != None:
+ if dom is not None:
itemmap['faildom'] = dom
else:
itemmap['faildom'] = "No Failover Domain"
@@ -1736,7 +1727,7 @@
#first get service by name from model
svc = modelb.getService(servicename)
resource_list = list()
- if svc != None:
+ if svc is not None:
indent_ctr = 0
children = svc.getChildren()
for child in children:
@@ -1751,7 +1742,7 @@
#Call yourself on every children
#then return
rc_map = {}
- if parent != None:
+ if parent is not None:
rc_map['parent'] = parent
rc_map['name'] = child.getName()
if child.isRefObject() == True:
@@ -1968,11 +1959,11 @@
fdom_map['cfgurl'] = baseurl + "?pagetype=" + FDOM_LIST + "&clustername=" + clustername
ordered_attr = fdom.getAttribute('ordered')
restricted_attr = fdom.getAttribute('restricted')
- if ordered_attr != None and (ordered_attr == "true" or ordered_attr == "1"):
+ if ordered_attr is not None and (ordered_attr == "true" or ordered_attr == "1"):
fdom_map['ordered'] = True
else:
fdom_map['ordered'] = False
- if restricted_attr != None and (restricted_attr == "true" or restricted_attr == "1"):
+ if restricted_attr is not None and (restricted_attr == "true" or restricted_attr == "1"):
fdom_map['restricted'] = True
else:
fdom_map['restricted'] = False
@@ -1993,7 +1984,7 @@
else:
nodesmap['status'] = NODE_INACTIVE
priority_attr = node.getAttribute('priority')
- if priority_attr != None:
+ if priority_attr is not None:
nodesmap['priority'] = "0"
nodelist.append(nodesmap)
fdom_map['nodeslist'] = nodelist
@@ -2006,7 +1997,7 @@
break #found more info about service...
domain = svc.getAttribute("domain")
- if domain != None:
+ if domain is not None:
if domain == fdom.getName():
svcmap = {}
svcmap['name'] = svcname
@@ -2018,47 +2009,52 @@
fdomlist.append(fdom_map)
return fdomlist
-def processClusterProps(self, ricci_agent, request):
- #First, retrieve cluster.conf from session
- conf = request.SESSION.get('conf')
- model = ModelBuilder(0, None, None, conf)
-
- #Next, determine actiontype and switch on it
- actiontype = request[ACTIONTYPE]
-
- if actiontype == BASECLUSTER:
- cp = model.getClusterPtr()
- cfgver = cp.getConfigVersion()
-
- rcfgver = request['cfgver']
-
- if cfgver != rcfgver:
- cint = int(cfgver)
- rint = int(rcfgver)
- if rint > cint:
- cp.setConfigVersion(rcfgver)
-
- rname = request['cluname']
- name = model.getClusterAlias()
-
- if rname != name:
- cp.addAttribute('alias', rname)
-
- response = request.RESPONSE
- response.redirect(request['HTTP_REFERER'] + "&busyfirst=true")
- return
+def clusterTaskProcess(self, model, request):
+ try:
+ task = request['task']
+ except:
+ try:
+ task = request.form['task']
+ except:
+ luci_log.debug_verbose('CTP1: no task specified')
+ task = None
- elif actiontype == FENCEDAEMON:
- pass
+ if not model:
+ try:
+ cluname = request['clustername']
+ if not cluname:
+ raise Exception, 'cluname is blank'
+ except:
+ try:
+ cluname = request.form['clustername']
+ if not cluname:
+ raise Exception, 'cluname is blank'
+ except:
+ luci_log.debug_verbose('CTP0: no model/no cluster name')
+ return 'Unable to determine the cluster name.'
+ try:
+ model = getModelForCluster(self, cluname)
+ except Exception, e:
+ luci_log.debug_verbose('CPT1: GMFC failed for %s' % cluname)
+ model = None
- elif actiontype == MULTICAST:
- pass
+ if not model:
+ return 'Unable to get the model object for %s' % cluname
- elif actiontype == QUORUMD:
- pass
+ if task == CLUSTER_STOP:
+ clusterStop(self, model)
+ elif task == CLUSTER_START:
+ clusterStart(self, model)
+ elif task == CLUSTER_RESTART:
+ clusterRestart(self, model)
+ elif task == CLUSTER_DELETE:
+ clusterStop(self, model, delete=True)
+ else:
+ return 'An unknown cluster task was requested.'
- else:
- return
+ response = request.RESPONSE
+ response.redirect('%s?pagetype=%s&clustername=%s&busyfirst=true' \
+ % (request['URL'], CLUSTER, model.getClusterName()))
def getClusterInfo(self, model, req):
try:
@@ -2091,7 +2087,6 @@
luci_log.debug_verbose('GCI3: unable to get model for cluster %s: %s' % cluname, str(e))
return {}
- baseurl = req['URL'] + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + cluname + "&"
prop_baseurl = req['URL'] + '?' + PAGETYPE + '=' + CLUSTER_CONFIG + '&' + CLUNAME + '=' + cluname + '&'
map = {}
basecluster_url = prop_baseurl + PROPERTIES_TAB + "=" + PROP_GENERAL_TAB
@@ -2110,10 +2105,10 @@
map['fencedaemon_url'] = fencedaemon_url
fdp = model.getFenceDaemonPtr()
pjd = fdp.getAttribute('post_join_delay')
- if pjd == None:
+ if pjd is None:
pjd = "6"
pfd = fdp.getAttribute('post_fail_delay')
- if pfd == None:
+ if pfd is None:
pfd = "0"
#post join delay
map['pjd'] = pjd
@@ -2152,27 +2147,27 @@
if is_quorumd:
qdp = model.getQuorumdPtr()
interval = qdp.getAttribute('interval')
- if interval != None:
+ if interval is not None:
map['interval'] = interval
tko = qdp.getAttribute('tko')
- if tko != None:
+ if tko is not None:
map['tko'] = tko
votes = qdp.getAttribute('votes')
- if votes != None:
+ if votes is not None:
map['votes'] = votes
min_score = qdp.getAttribute('min_score')
- if min_score != None:
+ if min_score is not None:
map['min_score'] = min_score
device = qdp.getAttribute('device')
- if device != None:
+ if device is not None:
map['device'] = device
label = qdp.getAttribute('label')
- if label != None:
+ if label is not None:
map['label'] = label
heuristic_kids = qdp.getChildren()
@@ -2180,24 +2175,24 @@
for kid in heuristic_kids:
hmap = {}
hname = kid.getAttribute('name')
- if hname == None:
+ if hname is None:
hname = h_ctr
h_ctr = h_ctr + 1
hprog = kid.getAttribute('program')
hscore = kid.getAttribute('score')
hinterval = kid.getAttribute('interval')
- if hprog == None:
+ if hprog is None:
continue
- if hname != None:
+ if hname is not None:
hmap['hname'] = hname
else:
hmap['hname'] = ""
hmap['hprog'] = hprog
- if hscore != None:
+ if hscore is not None:
hmap['hscore'] = hscore
else:
hmap['hscore'] = ""
- if hinterval != None:
+ if hinterval is not None:
hmap['hinterval'] = hinterval
else:
hmap['hinterval'] = ""
@@ -2239,6 +2234,12 @@
map['votes'] = clu['votes']
map['minquorum'] = clu['minQuorum']
map['clucfg'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_CONFIG + "&" + CLUNAME + "=" + clustername
+
+ map['restart_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_RESTART
+ map['stop_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_STOP
+ map['start_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_START
+ map['delete_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_DELETE
+
svc_dict_list = list()
for svc in svclist:
svc_dict = {}
@@ -2270,6 +2271,317 @@
return map
+def nodeLeave(self, rc, clustername, nodename_resolved):
+ path = str(CLUSTER_FOLDER_PATH + clustername + '/' + nodename_resolved)
+
+ try:
+ nodefolder = self.restrictedTraverse(path)
+ if not nodefolder:
+ raise Exception, 'cannot find database object at %s' % path
+ except Exception, e:
+ luci_log.debug('NLO: node_leave_cluster err: %s' % str(e))
+ return None
+
+ objname = str(nodename_resolved + "____flag")
+ fnpresent = noNodeFlagsPresent(self, nodefolder, objname, nodename_resolved)
+
+ if fnpresent is None:
+ luci_log.debug('NL1: An error occurred while checking flags for %s' \
+ % nodename_resolved)
+ return None
+
+ if fnpresent == False:
+ luci_log.debug('NL2: flags are still present for %s -- bailing out' \
+ % nodename_resolved)
+ return None
+
+ batch_number, result = nodeLeaveCluster(rc)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('NL3: nodeLeaveCluster error: batch_number and/or result is None')
+ return None
+
+ try:
+ set_node_flag(self, clustername, rc.hostname(), batch_number, NODE_LEAVE_CLUSTER, "Node \'%s\' leaving cluster" % nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('NL4: failed to set flags: %s' % str(e))
+ return True
+
+def nodeJoin(self, rc, clustername, nodename_resolved):
+ batch_number, result = nodeJoinCluster(rc)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('NJ0: batch_number and/or result is None')
+ return None
+
+ try:
+ set_node_flag(self, clustername, rc.hostname(), batch_number, NODE_JOIN_CLUSTER, "Node \'%s\' joining cluster" % nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('NJ1: failed to set flags: %s' % str(e))
+ return True
+
+def clusterStart(self, model):
+ if model is None:
+ return None
+
+ clustername = model.getClusterName()
+ nodes = model.getNodes()
+ if not nodes or len(nodes) < 1:
+ return None
+
+ errors = 0
+ for node in nodes:
+ nodename = node.getName().strip()
+ nodename_resolved = resolve_nodename(self, clustername, nodename)
+
+ try:
+ rc = RicciCommunicator(nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('CStart: RC %s: %s' \
+ % (nodename_resolved, str(e)))
+ errors += 1
+ continue
+ if nodeJoin(self, rc, clustername, nodename_resolved) is None:
+ luci_log.debug_verbose('CStart1: nodeLeave %s' % nodename_resolved)
+ errors += 1
+
+ return errors
+
+def clusterStop(self, model, delete=False):
+ if model is None:
+ return None
+
+ clustername = model.getClusterName()
+ nodes = model.getNodes()
+ if not nodes or len(nodes) < 1:
+ return None
+
+ errors = 0
+ for node in nodes:
+ nodename = node.getName().strip()
+ nodename_resolved = resolve_nodename(self, clustername, nodename)
+
+ try:
+ rc = RicciCommunicator(nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('[%d] CStop0: RC %s: %s' \
+ % (delete, nodename_resolved, str(e)))
+ errors += 1
+ continue
+ if nodeLeave(self, rc, clustername, nodename_resolved) is None:
+ luci_log.debug_verbose('[%d] CStop1: nodeLeave %s' \
+ % (delete, nodename_resolved))
+ errors += 1
+ return errors
+
+def clusterRestart(self, model):
+ snum_err = clusterStop(self, model)
+ if snum_err:
+ luci_log.debug_verbose('cluRestart0: clusterStop: %d errs' % snum_err)
+ jnum_err = clusterStart(self, model)
+ if jnum_err:
+ luci_log.debug_verbose('cluRestart0: clusterStart: %d errs' % jnum_err)
+ return snum_err + jnum_err
+
+def clusterDelete(self, model):
+ return clusterStop(self, model, delete=True)
+
+def forceNodeReboot(self, rc, clustername, nodename_resolved):
+ batch_number, result = nodeReboot(rc)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('FNR0: batch_number and/or result is None')
+ return None
+
+ try:
+ set_node_flag(self, clustername, rc.hostname(), batch_number, NODE_REBOOT, "Node \'%s\' is being rebooted" % nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('FNR1: failed to set flags: %s' % str(e))
+ return True
+
+def forceNodeFence(self, clustername, nodename, nodename_resolved):
+ path = str(CLUSTER_FOLDER_PATH + clustername)
+
+ try:
+ clusterfolder = self.restrictedTraverse(path)
+ if not clusterfolder:
+ raise Exception, 'no cluster folder at %s' % path
+ except Exception, e:
+ luci_log.debug('FNF0: The cluster folder %s could not be found: %s' \
+ % (clustername, str(e)))
+ return None
+
+ try:
+ nodes = clusterfolder.objectItems('Folder')
+ if not nodes or len(nodes) < 1:
+ raise Exception, 'no cluster nodes'
+ except Exception, e:
+ luci_log.debug('FNF1: No cluster nodes for %s were found: %s' \
+ % (clustername, str(e)))
+ return None
+
+ found_one = False
+ for node in nodes:
+ if node[1].getId().find(nodename) != (-1):
+ continue
+
+ try:
+ rc = RicciCommunicator(node[1].getId())
+ if not rc:
+ raise Exception, 'rc is None'
+ except Exception, e:
+ luci_log.debug('FNF2: ricci error for host %s: %s' \
+ % (node[0], str(e)))
+ continue
+
+ if not rc.authed():
+ rc = None
+ try:
+ snode = getStorageNode(self, node[1].getId())
+ setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
+ except:
+ pass
+
+ try:
+ setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH)
+ except:
+ pass
+
+ continue
+ found_one = True
+ break
+
+ if not found_one:
+ return None
+
+ batch_number, result = nodeFence(rc, nodename)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('FNF3: batch_number and/or result is None')
+ return None
+
+ try:
+ set_node_flag(self, clustername, rc.hostname(), batch_number, NODE_FENCE, "Node \'%s\' is being fenced" % nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('FNF4: failed to set flags: %s' % str(e))
+ return True
+
+def nodeDelete(self, rc, model, clustername, nodename, nodename_resolved):
+ #We need to get a node name other than the node
+ #to be deleted, then delete the node from the cluster.conf
+ #and propogate it. We will need two ricci agents for this task.
+
+ # Make sure we can find a second node before we hose anything.
+ path = str(CLUSTER_FOLDER_PATH + clustername)
+ try:
+ clusterfolder = self.restrictedTraverse(path)
+ if not clusterfolder:
+ raise Exception, 'no cluster folder at %s' % path
+ except Exception, e:
+ luci_log.debug_verbose('ND0: node delete error for cluster %s: %s' \
+ % (clustername, str(e)))
+ return None
+
+ try:
+ nodes = clusterfolder.objectItems('Folder')
+ if not nodes or len(nodes) < 1:
+ raise Exception, 'no cluster nodes in DB'
+ except Exception, e:
+ luci_log.debug_verbose('ND1: node delete error for cluster %s: %s' \
+ % (clustername, str(e)))
+
+ found_one = False
+ for node in nodes:
+ if node[1].getId().find(nodename) != (-1):
+ continue
+ #here we make certain the node is up...
+ # XXX- we should also make certain this host is still
+ # in the cluster we believe it is.
+ try:
+ rc2 = RicciCommunicator(node[1].getId())
+ except Exception, e:
+ luci_log.info('ND2: ricci %s error: %s' % (node[0], str(e)))
+ continue
+
+ if not rc2.authed():
+ try:
+ setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH)
+ except:
+ pass
+
+ try:
+ snode = getStorageNode(self, node[0])
+ setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
+ except:
+ pass
+
+ luci_log.debug_verbose('ND3: %s is not authed' % node[0])
+ rc2 = None
+ continue
+ else:
+ found_one = True
+ break
+
+ if not found_one:
+ luci_log.debug_verbose('ND4: unable to find ricci agent to delete %s from %s' % (nodename, clustername))
+ return None
+
+ #First, delete cluster.conf from node to be deleted.
+ #next, have node leave cluster.
+ batch_number, result = nodeLeaveCluster(rc, purge=True)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('ND5: batch_number and/or result is None')
+ return None
+
+ #It is not worth flagging this node in DB, as we are going
+ #to delete it anyway. Now, we need to delete node from model
+ #and send out new cluster.conf
+ delete_target = None
+ nodelist = model.getNodes()
+ find_node = lower(nodename)
+ for n in nodelist:
+ try:
+ if lower(n.getName()) == find_node:
+ delete_target = n
+ break
+ except:
+ continue
+
+ if delete_target is None:
+ luci_log.debug_verbose('ND6: unable to find delete target for %s in %s' \
+ % (nodename, clustername))
+ return None
+
+ model.deleteNode(delete_target)
+
+ try:
+ str_buf = model.exportModelAsString()
+ if not str_buf:
+ raise Exception, 'model string is blank'
+ except Exception, e:
+ luci_log.debug_verbose('ND7: exportModelAsString: %s' % str(e))
+ return None
+
+ # propagate the new cluster.conf via the second node
+ batch_number, result = setClusterConf(rc2, str(str_buf))
+ if batch_number is None:
+ luci_log.debug_verbose('ND8: batch number is None after del node in NTP')
+ return None
+
+ #Now we need to delete the node from the DB
+ path = str(CLUSTER_FOLDER_PATH + clustername)
+ del_path = str(path + '/' + nodename_resolved)
+
+ try:
+ delnode = self.restrictedTraverse(del_path)
+ clusterfolder = self.restrictedTraverse(path)
+ clusterfolder.manage_delObjects(delnode[0])
+ except Exception, e:
+ luci_log.debug_verbose('ND9: error deleting %s: %s' \
+ % (del_path, str(e)))
+
+ try:
+ set_node_flag(self, clustername, rc2.hostname(), batch_number, NODE_DELETE, "Deleting node \'%s\'" % nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('ND10: failed to set flags: %s' % str(e))
+ return True
+
def nodeTaskProcess(self, model, request):
try:
clustername = request['clustername']
@@ -2345,312 +2657,41 @@
return None
if task == NODE_LEAVE_CLUSTER:
- path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved)
-
- try:
- nodefolder = self.restrictedTraverse(path)
- if not nodefolder:
- raise Exception, 'cannot find directory at %s' % path
- except Exception, e:
- luci_log.debug('node_leave_cluster err: %s' % str(e))
- return None
-
- objname = str(nodename_resolved + "____flag")
-
- fnpresent = noNodeFlagsPresent(self, nodefolder, objname, nodename_resolved)
- if fnpresent is None:
- luci_log.debug('An error occurred while checking flags for %s' \
- % nodename_resolved)
+ if nodeLeave(self, rc, clustername, nodename_resolved) is None:
+ luci_log.debug_verbose('NTP: nodeLeave failed')
return None
- if fnpresent == False:
- luci_log.debug('flags are still present for %s -- bailing out' \
- % nodename_resolved)
- return None
-
- batch_number, result = nodeLeaveCluster(rc)
- if batch_number is None or result is None:
- luci_log.debug_verbose('nodeLeaveCluster error: batch_number and/or result is None')
- return None
-
- batch_id = str(batch_number)
- objpath = str(path + "/" + objname)
-
- try:
- nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID, batch_id, "string")
- flag.manage_addProperty(TASKTYPE, NODE_LEAVE_CLUSTER, "string")
- flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' leaving cluster", "string")
- except:
- luci_log.debug('An error occurred while setting flag %s' % objpath)
-
- response = request.RESPONSE
#Is this correct? Should we re-direct to the cluster page?
+ response = request.RESPONSE
response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
elif task == NODE_JOIN_CLUSTER:
- batch_number, result = nodeJoinCluster(rc)
- if batch_number is None or result is None:
- luci_log.debug_verbose('nodeJoin error: batch_number and/or result is None')
+ if nodeJoin(self, rc, clustername, nodename_resolved) is None:
+ luci_log.debug_verbose('NTP: nodeJoin failed')
return None
- path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved)
- batch_id = str(batch_number)
- objname = str(nodename_resolved + "____flag")
- objpath = str(path + "/" + objname)
-
- try:
- nodefolder = self.restrictedTraverse(path)
- nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID, batch_id, "string")
- flag.manage_addProperty(TASKTYPE, NODE_JOIN_CLUSTER, "string")
- flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' joining cluster", "string")
- except Exception, e:
- luci_log.debug_verbose('nodeJoin error: creating flags at %s: %s' \
- % (path, str(e)))
-
- response = request.RESPONSE
#Once again, is this correct? Should we re-direct to the cluster page?
+ response = request.RESPONSE
response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
elif task == NODE_REBOOT:
- batch_number, result = nodeReboot(rc)
- if batch_number is None or result is None:
- luci_log.debug_verbose('nodeReboot: batch_number and/or result is None')
+ if forceNodeReboot(self, rc, clustername, nodename_resolved) is None:
+ luci_log.debug_verbose('NTP: nodeReboot failed')
return None
- path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved)
- batch_id = str(batch_number)
- objname = str(nodename_resolved + "____flag")
- objpath = str(path + "/" + objname)
-
- try:
- nodefolder = self.restrictedTraverse(path)
- nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID, batch_id, "string")
- flag.manage_addProperty(TASKTYPE, NODE_REBOOT, "string")
- flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being rebooted", "string")
- except Exception, e:
- luci_log.debug_verbose('nodeReboot err: creating flags at %s: %s' \
- % (path, str(e)))
-
- response = request.RESPONSE
#Once again, is this correct? Should we re-direct to the cluster page?
+ response = request.RESPONSE
response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
elif task == NODE_FENCE:
- #here, we DON'T want to open connection to node to be fenced.
- path = str(CLUSTER_FOLDER_PATH + clustername)
- try:
- clusterfolder = self.restrictedTraverse(path)
- if not clusterfolder:
- raise Exception, 'no cluster folder at %s' % path
- except Exception, e:
- luci_log.debug('The cluster folder for %s could not be found: %s' \
- % (clustername, str(e)))
+ if forceNodeFence(self, clustername, nodename, nodename_resolved) is None:
+ luci_log.debug_verbose('NTP: nodeFencefailed')
return None
- try:
- nodes = clusterfolder.objectItems('Folder')
- if not nodes or len(nodes) < 1:
- raise Exception, 'no cluster nodes'
- except Exception, e:
- luci_log.debug('No cluster nodes for %s were found: %s' \
- % (clustername, str(e)))
- return None
-
- found_one = False
- for node in nodes:
- if node[1].getId().find(nodename) != (-1):
- continue
-
- try:
- rc = RicciCommunicator(node[1].getId())
- if not rc:
- raise Exception, 'rc is None'
- except Exception, e:
- luci_log.debug('ricci error for host %s: %s' \
- % (node[0], str(e)))
- continue
-
- if not rc.authed():
- rc = None
- try:
- snode = getStorageNode(self, node[1].getId())
- setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
- except:
- pass
-
- try:
- setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH)
- except:
- pass
-
- continue
- found_one = True
- break
-
- if not found_one:
- return None
-
- batch_number, result = nodeFence(rc, nodename)
- if batch_number is None or result is None:
- luci_log.debug_verbose('nodeFence: batch_number and/or result is None')
- return None
-
- path = str(path + "/" + nodename_resolved)
- batch_id = str(batch_number)
- objname = str(nodename_resolved + "____flag")
- objpath = str(path + "/" + objname)
-
- try:
- nodefolder = self.restrictedTraverse(path)
- nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID, batch_id, "string")
- flag.manage_addProperty(TASKTYPE, NODE_FENCE, "string")
- flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being fenced", "string")
- except Exception, e:
- luci_log.debug_verbose('nodeFence err: creating flags at %s: %s' \
- % (path, str(e)))
-
- response = request.RESPONSE
#Once again, is this correct? Should we re-direct to the cluster page?
+ response = request.RESPONSE
response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
elif task == NODE_DELETE:
- #We need to get a node name other than the node
- #to be deleted, then delete the node from the cluster.conf
- #and propogate it. We will need two ricci agents for this task.
-
- # Make sure we can find a second node before we hose anything.
- path = str(CLUSTER_FOLDER_PATH + clustername)
- try:
- clusterfolder = self.restrictedTraverse(path)
- if not clusterfolder:
- raise Exception, 'no cluster folder at %s' % path
- except Exception, e:
- luci_log.debug_verbose('node delete error for cluster %s: %s' \
- % (clustername, str(e)))
- return None
-
- try:
- nodes = clusterfolder.objectItems('Folder')
- if not nodes or len(nodes) < 1:
- raise Exception, 'no cluster nodes in DB'
- except Exception, e:
- luci_log.debug_verbose('node delete error for cluster %s: %s' \
- % (clustername, str(e)))
-
- found_one = False
- for node in nodes:
- if node[1].getId().find(nodename) != (-1):
- continue
- #here we make certain the node is up...
- # XXX- we should also make certain this host is still
- # in the cluster we believe it is.
- try:
- rc2 = RicciCommunicator(node[1].getId())
- except Exception, e:
- luci_log.info('ricci %s error: %s' % (node[0], str(e)))
- continue
- except:
- continue
-
- if not rc2.authed():
- try:
- setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH)
- except:
- pass
-
- try:
- snode = getStorageNode(self, node[0])
- setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
- except:
- pass
-
- luci_log.debug_verbose('%s is not authed' % node[0])
- rc2 = None
- continue
- else:
- found_one = True
- break
-
- if not found_one:
- luci_log.debug_verbose('unable to find ricci node to delete %s from %s' % (nodename, clustername))
+ if nodeDelete(self, rc, model, clustername, nodename, nodename_resolved) is None:
+ luci_log.debug_verbose('NTP: nodeDelete failed')
return None
-
- #First, delete cluster.conf from node to be deleted.
- #next, have node leave cluster.
- batch_number, result = nodeLeaveCluster(rc, purge=True)
- if batch_number is None or result is None:
- luci_log.debug_verbose('nodeDelete: batch_number and/or result is None')
- return None
-
- #It is not worth flagging this node in DB, as we are going
- #to delete it anyway. Now, we need to delete node from model
- #and send out new cluster.conf
- delete_target = None
- nodelist = model.getNodes()
- find_node = lower(nodename)
- for n in nodelist:
- try:
- if lower(n.getName()) == find_node:
- delete_target = n
- break
- except:
- continue
-
- if delete_target is None:
- luci_log.debug_verbose('unable to find delete target for %s in %s' \
- % (nodename, clustername))
- return None
-
- model.deleteNode(delete_target)
-
- try:
- str_buf = model.exportModelAsString()
- if not str_buf:
- raise Exception, 'model string is blank'
- except Exception, e:
- luci_log.debug_verbose('NTP exportModelAsString: %s' % str(e))
- return None
-
- # propagate the new cluster.conf via the second node
- batch_number, result = setClusterConf(rc2, str(str_buf))
- if batch_number is None:
- luci_log.debug_verbose('batch number is None after del node in NTP')
- return None
-
- #Now we need to delete the node from the DB
- path = str(CLUSTER_FOLDER_PATH + clustername)
- del_path = str(path + "/" + nodename_resolved)
-
- try:
- delnode = self.restrictedTraverse(del_path)
- clusterfolder = self.restrictedTraverse(path)
- clusterfolder.manage_delObjects(delnode[0])
- except Exception, e:
- luci_log.debug_verbose('error deleting %s: %s' % (del_path, str(e)))
-
- batch_id = str(batch_number)
- objname = str(nodename_resolved + "____flag")
- objpath = str(path + "/" + objname)
-
- try:
- clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID, batch_id, "string")
- flag.manage_addProperty(TASKTYPE, NODE_DELETE, "string")
- flag.manage_addProperty(FLAG_DESC, "Deleting node \'" + nodename + "\'", "string")
- except Exception, e:
- luci_log.debug_verbose('nodeDelete %s err setting flag at %s: %s' \
- % (nodename, objpath, str(e)))
-
response = request.RESPONSE
response.redirect(request['HTTP_REFERER'] + "&busyfirst=true")
@@ -2951,7 +2992,8 @@
except:
fd = None #Set to None in case last time thru loop
continue
- if fd != None:
+
+ if fd is not None:
if fd.isShared() == False: #Not a shared dev...build struct and add
fencedev = {}
fencedev['prettyname'] = FENCE_OPTS[fd.getAgentType()]
@@ -2974,7 +3016,7 @@
last_kid_fd = None
level1.append(fencedev)
else: #This dev is shared
- if (last_kid_fd != None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd
+ if (last_kid_fd is not None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd
instance_struct = {}
instance_struct['id'] = str(minor_num)
minor_num = minor_num + 1
@@ -3045,7 +3087,7 @@
except:
fd = None #Set to None in case last time thru loop
continue
- if fd != None:
+ if fd is not None:
if fd.isShared() == False: #Not a shared dev...build struct and add
fencedev = {}
fencedev['prettyname'] = FENCE_OPTS[fd.getAgentType()]
@@ -3068,7 +3110,7 @@
last_kid_fd = None
level2.append(fencedev)
else: #This dev is shared
- if (last_kid_fd != None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd
+ if (last_kid_fd is not None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd
instance_struct = {}
instance_struct['id'] = str(minor_num)
minor_num = minor_num + 1
@@ -3584,7 +3626,7 @@
def getResourceInfo(modelb, request):
if not modelb:
- luci_log.debug_verbose('no modelb obj in getResourceInfo')
+ luci_log.debug_verbose('GRI0: no modelb object in session')
return {}
name = None
@@ -4539,6 +4581,24 @@
modelb.setIsVirtualized(isVirtualized)
return modelb
+def getModelForCluster(self, clustername):
+ rc = getRicciAgent(self, clustername)
+ if not rc:
+ luci_log.debug_verbose('GMFC0: unable to find a ricci agent for %s' \
+ % clustername)
+ return None
+
+ try:
+ model = getModelBuilder(None, rc, rc.dom0())
+ if not model:
+ raise Exception, 'model is none'
+ except Exception, e:
+ luci_log.debug_verbose('GMFC1: unable to get model builder for %s: %s' \
+ % (clustername, str(e)))
+ return None
+
+ return model
+
def set_node_flag(self, cluname, agent, batchid, task, desc):
path = str(CLUSTER_FOLDER_PATH + cluname)
batch_id = str(batchid)
@@ -4551,7 +4611,7 @@
flag = self.restrictedTraverse(objpath)
flag.manage_addProperty(BATCH_ID, batch_id, 'string')
flag.manage_addProperty(TASKTYPE, task, 'string')
- flag.manage_addProperty(FLAG_DESC, desc)
+ flag.manage_addProperty(FLAG_DESC, desc, 'string')
except Exception, e:
errmsg = 'Error creating flag (%s,%s,%s)@%s: %s' \
% (batch_id, task, desc, objpath, str(e))
--- conga/luci/site/luci/Extensions/conga_constants.py 2006/11/06 23:55:23 1.23
+++ conga/luci/site/luci/Extensions/conga_constants.py 2006/11/09 20:32:02 1.24
@@ -43,6 +43,12 @@
FENCEDEV_CONFIG="53"
FENCEDEV="54"
+#Cluster tasks
+CLUSTER_STOP = '1000'
+CLUSTER_START = '1001'
+CLUSTER_RESTART = '1002'
+CLUSTER_DELETE = '1003'
+
#General tasks
NODE_LEAVE_CLUSTER="100"
NODE_JOIN_CLUSTER="101"
next reply other threads:[~2006-11-09 20:32 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-11-09 20:32 rmccabe [this message]
-- strict thread matches above, loose matches on Subject: below --
2007-09-21 3:11 [Cluster-devel] conga/luci cluster/form-macros site/luci/Exten rmccabe
2007-06-19 15:54 rmccabe
2007-05-03 20:16 rmccabe
2007-03-15 16:41 rmccabe
2007-03-14 22:38 rmccabe
2007-03-14 22:37 rmccabe
2007-03-05 16:50 rmccabe
2007-03-05 16:50 rmccabe
2007-03-05 16:49 rmccabe
2007-02-15 22:44 rmccabe
2007-02-08 3:46 rmccabe
2007-02-07 17:02 rmccabe
2007-02-07 16:55 rmccabe
2007-02-02 4:34 rmccabe
2007-02-02 0:11 rmccabe
2007-02-01 20:49 rmccabe
2007-01-31 23:36 rmccabe
2007-01-31 5:26 rmccabe
2007-01-23 13:53 rmccabe
2007-01-15 18:21 rmccabe
2007-01-11 19:11 rmccabe
2007-01-10 21:40 rmccabe
2007-01-06 3:29 rmccabe
2006-12-14 23:14 rmccabe
2006-12-14 18:22 rmccabe
2006-12-11 22:42 rmccabe
2006-12-11 21:51 rmccabe
2006-12-06 22:11 rmccabe
2006-12-06 21:16 rmccabe
2006-11-13 21:40 rmccabe
2006-11-12 2:10 rmccabe
2006-11-03 22:48 rmccabe
2006-10-25 1:53 rmccabe
2006-10-25 1:11 rmccabe
2006-10-13 21:25 rmccabe
2006-08-03 18:36 shuennek
2006-07-21 14:49 rmccabe
2006-07-20 16:59 rmccabe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061109203203.11621.qmail@sourceware.org \
--to=rmccabe@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).