From: rmccabe@sourceware.org <rmccabe@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] conga/luci cluster/form-macros site/luci/Exten ...
Date: 9 Nov 2006 20:32:03 -0000 [thread overview]
Message-ID: <20061109203203.11621.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: conga
Changes by: rmccabe at sourceware.org 2006-11-09 20:32:02
Modified files:
luci/cluster : form-macros
luci/site/luci/Extensions: cluster_adapters.py
conga_constants.py
Log message:
fix the cluster start/stop/restart/delete actions in the actions menu so they do what they're supposed to (as opposed to nothing)
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/cluster/form-macros.diff?cvsroot=cluster&r1=1.101&r2=1.102
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/cluster_adapters.py.diff?cvsroot=cluster&r1=1.156&r2=1.157
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/conga_constants.py.diff?cvsroot=cluster&r1=1.23&r2=1.24
--- conga/luci/cluster/form-macros 2006/11/07 21:33:52 1.101
+++ conga/luci/cluster/form-macros 2006/11/09 20:32:02 1.102
@@ -89,9 +89,27 @@
<td class="cluster cluster_action">
<form method="post" onSubmit="return dropdown(this.gourl)">
<select name="gourl" id="cluster_action" class="cluster">
- <option tal:condition="python: 'running' in cstatus and cstatus['running'] != 'true'" value="" class="cluster running">Start this cluster</option>
- <option tal:condition="python: 'running' in cstatus and cstatus['running'] == 'true'" value="" class="cluster stopped">Stop this cluster</option>
- <option value="" class="cluster">Restart this cluster</option>
+ <option class="cluster running"
+ tal:condition="python: 'running' in cstatus and cstatus['running'] != 'true'"
+ tal:attributes="value cstatus/start_url | nothing">
+ Start this cluster
+ </option>
+
+ <option class="cluster"
+ tal:attributes="value cstatus/restart_url | nothing">
+ Restart this cluster
+ </option>
+
+ <option class="cluster stopped"
+ tal:condition="python: 'running' in cstatus and cstatus['running'] == 'true'"
+ tal:attributes="value cstatus/stop_url | nothing">
+ Stop this cluster
+ </option>
+
+ <option class="cluster stopped"
+ tal:attributes="value cstatus/delete_url | nothing">
+ Delete this cluster
+ </option>
</select>
<input class="cluster" type="submit" value="Go" />
</form>
@@ -1068,11 +1086,9 @@
</div>
<div metal:define-macro="clusterprocess-form">
- <tal:block tal:define="
- global ricci_agent ri_agent | python: here.getRicciAgentForCluster(request)" />
-
<tal:block
- tal:define="res python: here.processClusterProps(ricci_agent, request)" />
+ tal:define="result python: here.clusterTaskProcess(modelb, request)"/>
+ <h2>Cluster Process Form</h2>
</div>
<div metal:define-macro="fence-option-list">
--- conga/luci/site/luci/Extensions/cluster_adapters.py 2006/11/09 14:17:08 1.156
+++ conga/luci/site/luci/Extensions/cluster_adapters.py 2006/11/09 20:32:02 1.157
@@ -112,7 +112,6 @@
def validateCreateCluster(self, request):
errors = list()
- messages = list()
requestResults = {}
if not havePermCreateCluster(self):
@@ -234,7 +233,7 @@
buildClusterCreateFlags(self, batch_id_map, clusterName)
response = request.RESPONSE
- response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clusterName)
+ response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clusterName + '&busyfirst=true')
def buildClusterCreateFlags(self, batch_map, clusterName):
path = str(CLUSTER_FOLDER_PATH + clusterName)
@@ -379,10 +378,11 @@
errors.append('An error occurred while attempting to add cluster node \"' + clunode['host'] + '\"')
return (False, {'errors': errors, 'requestResults': cluster_properties})
- messages.append('Cluster join initiated for host \"' + clunode['host'] + '\"')
-
+ messages.append('Cluster join initiated for host \"' + clunode['host'] + '\"')
buildClusterCreateFlags(self, batch_id_map, clusterName)
- return (True, {'errors': errors, 'messages': messages})
+
+ response = request.RESPONSE
+ response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clusterName + '&busyfirst=true')
def validateServiceAdd(self, request):
try:
@@ -757,23 +757,14 @@
luci_log.debug_verbose('VCC0a: no model, no cluster name')
return (False, {'errors': ['No cluster model was found.']})
- rc = getRicciAgent(self, cluname)
- if not rc:
- luci_log.debug_verbose('VCCb: no model in session, unable to find a ricci agent for the %s cluster' % cluname)
- return (False, {'errors': ['No cluster model was found.']})
-
try:
- model = getModelBuilder(None, rc, rc.dom0())
- if not model:
- raise Exception, 'model is none'
- except Exception, e:
- luci_log.debug_verbose('VCCc: unable to get model builder for cluster %s: %s' % (cluname, str(e)))
+ model = getModelForCluster(self, cluname)
+ except:
model = None
if model is None:
luci_log.debug_verbose('VCC0: unable to get model from session')
return (False, {'errors': ['No cluster model was found.']})
-
try:
if not 'configtype' in request.form:
luci_log.debug_verbose('VCC2: no configtype')
@@ -853,7 +844,7 @@
return (retcode, {'errors': errors, 'messages': messages})
response = request.RESPONSE
- response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
+ response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername + '&busyfirst=true')
def validateFenceAdd(self, request):
return (True, {})
@@ -1419,7 +1410,7 @@
def getClusterAlias(self, model):
alias = model.getClusterAlias()
- if alias == None:
+ if alias is None:
return model.getClusterName()
else:
return alias
@@ -1652,7 +1643,7 @@
svc = modelb.retrieveServiceByName(item['name'])
dom = svc.getAttribute("domain")
- if dom != None:
+ if dom is not None:
itemmap['faildom'] = dom
else:
itemmap['faildom'] = "No Failover Domain"
@@ -1736,7 +1727,7 @@
#first get service by name from model
svc = modelb.getService(servicename)
resource_list = list()
- if svc != None:
+ if svc is not None:
indent_ctr = 0
children = svc.getChildren()
for child in children:
@@ -1751,7 +1742,7 @@
#Call yourself on every children
#then return
rc_map = {}
- if parent != None:
+ if parent is not None:
rc_map['parent'] = parent
rc_map['name'] = child.getName()
if child.isRefObject() == True:
@@ -1968,11 +1959,11 @@
fdom_map['cfgurl'] = baseurl + "?pagetype=" + FDOM_LIST + "&clustername=" + clustername
ordered_attr = fdom.getAttribute('ordered')
restricted_attr = fdom.getAttribute('restricted')
- if ordered_attr != None and (ordered_attr == "true" or ordered_attr == "1"):
+ if ordered_attr is not None and (ordered_attr == "true" or ordered_attr == "1"):
fdom_map['ordered'] = True
else:
fdom_map['ordered'] = False
- if restricted_attr != None and (restricted_attr == "true" or restricted_attr == "1"):
+ if restricted_attr is not None and (restricted_attr == "true" or restricted_attr == "1"):
fdom_map['restricted'] = True
else:
fdom_map['restricted'] = False
@@ -1993,7 +1984,7 @@
else:
nodesmap['status'] = NODE_INACTIVE
priority_attr = node.getAttribute('priority')
- if priority_attr != None:
+ if priority_attr is not None:
nodesmap['priority'] = "0"
nodelist.append(nodesmap)
fdom_map['nodeslist'] = nodelist
@@ -2006,7 +1997,7 @@
break #found more info about service...
domain = svc.getAttribute("domain")
- if domain != None:
+ if domain is not None:
if domain == fdom.getName():
svcmap = {}
svcmap['name'] = svcname
@@ -2018,47 +2009,52 @@
fdomlist.append(fdom_map)
return fdomlist
-def processClusterProps(self, ricci_agent, request):
- #First, retrieve cluster.conf from session
- conf = request.SESSION.get('conf')
- model = ModelBuilder(0, None, None, conf)
-
- #Next, determine actiontype and switch on it
- actiontype = request[ACTIONTYPE]
-
- if actiontype == BASECLUSTER:
- cp = model.getClusterPtr()
- cfgver = cp.getConfigVersion()
-
- rcfgver = request['cfgver']
-
- if cfgver != rcfgver:
- cint = int(cfgver)
- rint = int(rcfgver)
- if rint > cint:
- cp.setConfigVersion(rcfgver)
-
- rname = request['cluname']
- name = model.getClusterAlias()
-
- if rname != name:
- cp.addAttribute('alias', rname)
-
- response = request.RESPONSE
- response.redirect(request['HTTP_REFERER'] + "&busyfirst=true")
- return
+def clusterTaskProcess(self, model, request):
+ try:
+ task = request['task']
+ except:
+ try:
+ task = request.form['task']
+ except:
+ luci_log.debug_verbose('CTP1: no task specified')
+ task = None
- elif actiontype == FENCEDAEMON:
- pass
+ if not model:
+ try:
+ cluname = request['clustername']
+ if not cluname:
+ raise Exception, 'cluname is blank'
+ except:
+ try:
+ cluname = request.form['clustername']
+ if not cluname:
+ raise Exception, 'cluname is blank'
+ except:
+ luci_log.debug_verbose('CTP0: no model/no cluster name')
+ return 'Unable to determine the cluster name.'
+ try:
+ model = getModelForCluster(self, cluname)
+ except Exception, e:
+ luci_log.debug_verbose('CPT1: GMFC failed for %s' % cluname)
+ model = None
- elif actiontype == MULTICAST:
- pass
+ if not model:
+ return 'Unable to get the model object for %s' % cluname
- elif actiontype == QUORUMD:
- pass
+ if task == CLUSTER_STOP:
+ clusterStop(self, model)
+ elif task == CLUSTER_START:
+ clusterStart(self, model)
+ elif task == CLUSTER_RESTART:
+ clusterRestart(self, model)
+ elif task == CLUSTER_DELETE:
+ clusterStop(self, model, delete=True)
+ else:
+ return 'An unknown cluster task was requested.'
- else:
- return
+ response = request.RESPONSE
+ response.redirect('%s?pagetype=%s&clustername=%s&busyfirst=true' \
+ % (request['URL'], CLUSTER, model.getClusterName()))
def getClusterInfo(self, model, req):
try:
@@ -2091,7 +2087,6 @@
luci_log.debug_verbose('GCI3: unable to get model for cluster %s: %s' % cluname, str(e))
return {}
- baseurl = req['URL'] + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + cluname + "&"
prop_baseurl = req['URL'] + '?' + PAGETYPE + '=' + CLUSTER_CONFIG + '&' + CLUNAME + '=' + cluname + '&'
map = {}
basecluster_url = prop_baseurl + PROPERTIES_TAB + "=" + PROP_GENERAL_TAB
@@ -2110,10 +2105,10 @@
map['fencedaemon_url'] = fencedaemon_url
fdp = model.getFenceDaemonPtr()
pjd = fdp.getAttribute('post_join_delay')
- if pjd == None:
+ if pjd is None:
pjd = "6"
pfd = fdp.getAttribute('post_fail_delay')
- if pfd == None:
+ if pfd is None:
pfd = "0"
#post join delay
map['pjd'] = pjd
@@ -2152,27 +2147,27 @@
if is_quorumd:
qdp = model.getQuorumdPtr()
interval = qdp.getAttribute('interval')
- if interval != None:
+ if interval is not None:
map['interval'] = interval
tko = qdp.getAttribute('tko')
- if tko != None:
+ if tko is not None:
map['tko'] = tko
votes = qdp.getAttribute('votes')
- if votes != None:
+ if votes is not None:
map['votes'] = votes
min_score = qdp.getAttribute('min_score')
- if min_score != None:
+ if min_score is not None:
map['min_score'] = min_score
device = qdp.getAttribute('device')
- if device != None:
+ if device is not None:
map['device'] = device
label = qdp.getAttribute('label')
- if label != None:
+ if label is not None:
map['label'] = label
heuristic_kids = qdp.getChildren()
@@ -2180,24 +2175,24 @@
for kid in heuristic_kids:
hmap = {}
hname = kid.getAttribute('name')
- if hname == None:
+ if hname is None:
hname = h_ctr
h_ctr = h_ctr + 1
hprog = kid.getAttribute('program')
hscore = kid.getAttribute('score')
hinterval = kid.getAttribute('interval')
- if hprog == None:
+ if hprog is None:
continue
- if hname != None:
+ if hname is not None:
hmap['hname'] = hname
else:
hmap['hname'] = ""
hmap['hprog'] = hprog
- if hscore != None:
+ if hscore is not None:
hmap['hscore'] = hscore
else:
hmap['hscore'] = ""
- if hinterval != None:
+ if hinterval is not None:
hmap['hinterval'] = hinterval
else:
hmap['hinterval'] = ""
@@ -2239,6 +2234,12 @@
map['votes'] = clu['votes']
map['minquorum'] = clu['minQuorum']
map['clucfg'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_CONFIG + "&" + CLUNAME + "=" + clustername
+
+ map['restart_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_RESTART
+ map['stop_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_STOP
+ map['start_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_START
+ map['delete_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_DELETE
+
svc_dict_list = list()
for svc in svclist:
svc_dict = {}
@@ -2270,6 +2271,317 @@
return map
+def nodeLeave(self, rc, clustername, nodename_resolved):
+ path = str(CLUSTER_FOLDER_PATH + clustername + '/' + nodename_resolved)
+
+ try:
+ nodefolder = self.restrictedTraverse(path)
+ if not nodefolder:
+ raise Exception, 'cannot find database object at %s' % path
+ except Exception, e:
+ luci_log.debug('NLO: node_leave_cluster err: %s' % str(e))
+ return None
+
+ objname = str(nodename_resolved + "____flag")
+ fnpresent = noNodeFlagsPresent(self, nodefolder, objname, nodename_resolved)
+
+ if fnpresent is None:
+ luci_log.debug('NL1: An error occurred while checking flags for %s' \
+ % nodename_resolved)
+ return None
+
+ if fnpresent == False:
+ luci_log.debug('NL2: flags are still present for %s -- bailing out' \
+ % nodename_resolved)
+ return None
+
+ batch_number, result = nodeLeaveCluster(rc)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('NL3: nodeLeaveCluster error: batch_number and/or result is None')
+ return None
+
+ try:
+ set_node_flag(self, clustername, rc.hostname(), batch_number, NODE_LEAVE_CLUSTER, "Node \'%s\' leaving cluster" % nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('NL4: failed to set flags: %s' % str(e))
+ return True
+
+def nodeJoin(self, rc, clustername, nodename_resolved):
+ batch_number, result = nodeJoinCluster(rc)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('NJ0: batch_number and/or result is None')
+ return None
+
+ try:
+ set_node_flag(self, clustername, rc.hostname(), batch_number, NODE_JOIN_CLUSTER, "Node \'%s\' joining cluster" % nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('NJ1: failed to set flags: %s' % str(e))
+ return True
+
+def clusterStart(self, model):
+ if model is None:
+ return None
+
+ clustername = model.getClusterName()
+ nodes = model.getNodes()
+ if not nodes or len(nodes) < 1:
+ return None
+
+ errors = 0
+ for node in nodes:
+ nodename = node.getName().strip()
+ nodename_resolved = resolve_nodename(self, clustername, nodename)
+
+ try:
+ rc = RicciCommunicator(nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('CStart: RC %s: %s' \
+ % (nodename_resolved, str(e)))
+ errors += 1
+ continue
+ if nodeJoin(self, rc, clustername, nodename_resolved) is None:
+ luci_log.debug_verbose('CStart1: nodeLeave %s' % nodename_resolved)
+ errors += 1
+
+ return errors
+
+def clusterStop(self, model, delete=False):
+ if model is None:
+ return None
+
+ clustername = model.getClusterName()
+ nodes = model.getNodes()
+ if not nodes or len(nodes) < 1:
+ return None
+
+ errors = 0
+ for node in nodes:
+ nodename = node.getName().strip()
+ nodename_resolved = resolve_nodename(self, clustername, nodename)
+
+ try:
+ rc = RicciCommunicator(nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('[%d] CStop0: RC %s: %s' \
+ % (delete, nodename_resolved, str(e)))
+ errors += 1
+ continue
+ if nodeLeave(self, rc, clustername, nodename_resolved) is None:
+ luci_log.debug_verbose('[%d] CStop1: nodeLeave %s' \
+ % (delete, nodename_resolved))
+ errors += 1
+ return errors
+
+def clusterRestart(self, model):
+ snum_err = clusterStop(self, model)
+ if snum_err:
+ luci_log.debug_verbose('cluRestart0: clusterStop: %d errs' % snum_err)
+ jnum_err = clusterStart(self, model)
+ if jnum_err:
+ luci_log.debug_verbose('cluRestart0: clusterStart: %d errs' % jnum_err)
+ return snum_err + jnum_err
+
+def clusterDelete(self, model):
+ return clusterStop(self, model, delete=True)
+
+def forceNodeReboot(self, rc, clustername, nodename_resolved):
+ batch_number, result = nodeReboot(rc)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('FNR0: batch_number and/or result is None')
+ return None
+
+ try:
+ set_node_flag(self, clustername, rc.hostname(), batch_number, NODE_REBOOT, "Node \'%s\' is being rebooted" % nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('FNR1: failed to set flags: %s' % str(e))
+ return True
+
+def forceNodeFence(self, clustername, nodename, nodename_resolved):
+ path = str(CLUSTER_FOLDER_PATH + clustername)
+
+ try:
+ clusterfolder = self.restrictedTraverse(path)
+ if not clusterfolder:
+ raise Exception, 'no cluster folder at %s' % path
+ except Exception, e:
+ luci_log.debug('FNF0: The cluster folder %s could not be found: %s' \
+ % (clustername, str(e)))
+ return None
+
+ try:
+ nodes = clusterfolder.objectItems('Folder')
+ if not nodes or len(nodes) < 1:
+ raise Exception, 'no cluster nodes'
+ except Exception, e:
+ luci_log.debug('FNF1: No cluster nodes for %s were found: %s' \
+ % (clustername, str(e)))
+ return None
+
+ found_one = False
+ for node in nodes:
+ if node[1].getId().find(nodename) != (-1):
+ continue
+
+ try:
+ rc = RicciCommunicator(node[1].getId())
+ if not rc:
+ raise Exception, 'rc is None'
+ except Exception, e:
+ luci_log.debug('FNF2: ricci error for host %s: %s' \
+ % (node[0], str(e)))
+ continue
+
+ if not rc.authed():
+ rc = None
+ try:
+ snode = getStorageNode(self, node[1].getId())
+ setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
+ except:
+ pass
+
+ try:
+ setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH)
+ except:
+ pass
+
+ continue
+ found_one = True
+ break
+
+ if not found_one:
+ return None
+
+ batch_number, result = nodeFence(rc, nodename)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('FNF3: batch_number and/or result is None')
+ return None
+
+ try:
+ set_node_flag(self, clustername, rc.hostname(), batch_number, NODE_FENCE, "Node \'%s\' is being fenced" % nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('FNF4: failed to set flags: %s' % str(e))
+ return True
+
+def nodeDelete(self, rc, model, clustername, nodename, nodename_resolved):
+ #We need to get a node name other than the node
+ #to be deleted, then delete the node from the cluster.conf
+ #and propogate it. We will need two ricci agents for this task.
+
+ # Make sure we can find a second node before we hose anything.
+ path = str(CLUSTER_FOLDER_PATH + clustername)
+ try:
+ clusterfolder = self.restrictedTraverse(path)
+ if not clusterfolder:
+ raise Exception, 'no cluster folder at %s' % path
+ except Exception, e:
+ luci_log.debug_verbose('ND0: node delete error for cluster %s: %s' \
+ % (clustername, str(e)))
+ return None
+
+ try:
+ nodes = clusterfolder.objectItems('Folder')
+ if not nodes or len(nodes) < 1:
+ raise Exception, 'no cluster nodes in DB'
+ except Exception, e:
+ luci_log.debug_verbose('ND1: node delete error for cluster %s: %s' \
+ % (clustername, str(e)))
+
+ found_one = False
+ for node in nodes:
+ if node[1].getId().find(nodename) != (-1):
+ continue
+ #here we make certain the node is up...
+ # XXX- we should also make certain this host is still
+ # in the cluster we believe it is.
+ try:
+ rc2 = RicciCommunicator(node[1].getId())
+ except Exception, e:
+ luci_log.info('ND2: ricci %s error: %s' % (node[0], str(e)))
+ continue
+
+ if not rc2.authed():
+ try:
+ setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH)
+ except:
+ pass
+
+ try:
+ snode = getStorageNode(self, node[0])
+ setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
+ except:
+ pass
+
+ luci_log.debug_verbose('ND3: %s is not authed' % node[0])
+ rc2 = None
+ continue
+ else:
+ found_one = True
+ break
+
+ if not found_one:
+ luci_log.debug_verbose('ND4: unable to find ricci agent to delete %s from %s' % (nodename, clustername))
+ return None
+
+ #First, delete cluster.conf from node to be deleted.
+ #next, have node leave cluster.
+ batch_number, result = nodeLeaveCluster(rc, purge=True)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('ND5: batch_number and/or result is None')
+ return None
+
+ #It is not worth flagging this node in DB, as we are going
+ #to delete it anyway. Now, we need to delete node from model
+ #and send out new cluster.conf
+ delete_target = None
+ nodelist = model.getNodes()
+ find_node = lower(nodename)
+ for n in nodelist:
+ try:
+ if lower(n.getName()) == find_node:
+ delete_target = n
+ break
+ except:
+ continue
+
+ if delete_target is None:
+ luci_log.debug_verbose('ND6: unable to find delete target for %s in %s' \
+ % (nodename, clustername))
+ return None
+
+ model.deleteNode(delete_target)
+
+ try:
+ str_buf = model.exportModelAsString()
+ if not str_buf:
+ raise Exception, 'model string is blank'
+ except Exception, e:
+ luci_log.debug_verbose('ND7: exportModelAsString: %s' % str(e))
+ return None
+
+ # propagate the new cluster.conf via the second node
+ batch_number, result = setClusterConf(rc2, str(str_buf))
+ if batch_number is None:
+ luci_log.debug_verbose('ND8: batch number is None after del node in NTP')
+ return None
+
+ #Now we need to delete the node from the DB
+ path = str(CLUSTER_FOLDER_PATH + clustername)
+ del_path = str(path + '/' + nodename_resolved)
+
+ try:
+ delnode = self.restrictedTraverse(del_path)
+ clusterfolder = self.restrictedTraverse(path)
+ clusterfolder.manage_delObjects(delnode[0])
+ except Exception, e:
+ luci_log.debug_verbose('ND9: error deleting %s: %s' \
+ % (del_path, str(e)))
+
+ try:
+ set_node_flag(self, clustername, rc2.hostname(), batch_number, NODE_DELETE, "Deleting node \'%s\'" % nodename_resolved)
+ except Exception, e:
+ luci_log.debug_verbose('ND10: failed to set flags: %s' % str(e))
+ return True
+
def nodeTaskProcess(self, model, request):
try:
clustername = request['clustername']
@@ -2345,312 +2657,41 @@
return None
if task == NODE_LEAVE_CLUSTER:
- path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved)
-
- try:
- nodefolder = self.restrictedTraverse(path)
- if not nodefolder:
- raise Exception, 'cannot find directory at %s' % path
- except Exception, e:
- luci_log.debug('node_leave_cluster err: %s' % str(e))
- return None
-
- objname = str(nodename_resolved + "____flag")
-
- fnpresent = noNodeFlagsPresent(self, nodefolder, objname, nodename_resolved)
- if fnpresent is None:
- luci_log.debug('An error occurred while checking flags for %s' \
- % nodename_resolved)
+ if nodeLeave(self, rc, clustername, nodename_resolved) is None:
+ luci_log.debug_verbose('NTP: nodeLeave failed')
return None
- if fnpresent == False:
- luci_log.debug('flags are still present for %s -- bailing out' \
- % nodename_resolved)
- return None
-
- batch_number, result = nodeLeaveCluster(rc)
- if batch_number is None or result is None:
- luci_log.debug_verbose('nodeLeaveCluster error: batch_number and/or result is None')
- return None
-
- batch_id = str(batch_number)
- objpath = str(path + "/" + objname)
-
- try:
- nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID, batch_id, "string")
- flag.manage_addProperty(TASKTYPE, NODE_LEAVE_CLUSTER, "string")
- flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' leaving cluster", "string")
- except:
- luci_log.debug('An error occurred while setting flag %s' % objpath)
-
- response = request.RESPONSE
#Is this correct? Should we re-direct to the cluster page?
+ response = request.RESPONSE
response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
elif task == NODE_JOIN_CLUSTER:
- batch_number, result = nodeJoinCluster(rc)
- if batch_number is None or result is None:
- luci_log.debug_verbose('nodeJoin error: batch_number and/or result is None')
+ if nodeJoin(self, rc, clustername, nodename_resolved) is None:
+ luci_log.debug_verbose('NTP: nodeJoin failed')
return None
- path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved)
- batch_id = str(batch_number)
- objname = str(nodename_resolved + "____flag")
- objpath = str(path + "/" + objname)
-
- try:
- nodefolder = self.restrictedTraverse(path)
- nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID, batch_id, "string")
- flag.manage_addProperty(TASKTYPE, NODE_JOIN_CLUSTER, "string")
- flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' joining cluster", "string")
- except Exception, e:
- luci_log.debug_verbose('nodeJoin error: creating flags at %s: %s' \
- % (path, str(e)))
-
- response = request.RESPONSE
#Once again, is this correct? Should we re-direct to the cluster page?
+ response = request.RESPONSE
response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
elif task == NODE_REBOOT:
- batch_number, result = nodeReboot(rc)
- if batch_number is None or result is None:
- luci_log.debug_verbose('nodeReboot: batch_number and/or result is None')
+ if forceNodeReboot(self, rc, clustername, nodename_resolved) is None:
+ luci_log.debug_verbose('NTP: nodeReboot failed')
return None
- path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved)
- batch_id = str(batch_number)
- objname = str(nodename_resolved + "____flag")
- objpath = str(path + "/" + objname)
-
- try:
- nodefolder = self.restrictedTraverse(path)
- nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID, batch_id, "string")
- flag.manage_addProperty(TASKTYPE, NODE_REBOOT, "string")
- flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being rebooted", "string")
- except Exception, e:
- luci_log.debug_verbose('nodeReboot err: creating flags at %s: %s' \
- % (path, str(e)))
-
- response = request.RESPONSE
#Once again, is this correct? Should we re-direct to the cluster page?
+ response = request.RESPONSE
response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
elif task == NODE_FENCE:
- #here, we DON'T want to open connection to node to be fenced.
- path = str(CLUSTER_FOLDER_PATH + clustername)
- try:
- clusterfolder = self.restrictedTraverse(path)
- if not clusterfolder:
- raise Exception, 'no cluster folder at %s' % path
- except Exception, e:
- luci_log.debug('The cluster folder for %s could not be found: %s' \
- % (clustername, str(e)))
+ if forceNodeFence(self, clustername, nodename, nodename_resolved) is None:
+ luci_log.debug_verbose('NTP: nodeFencefailed')
return None
- try:
- nodes = clusterfolder.objectItems('Folder')
- if not nodes or len(nodes) < 1:
- raise Exception, 'no cluster nodes'
- except Exception, e:
- luci_log.debug('No cluster nodes for %s were found: %s' \
- % (clustername, str(e)))
- return None
-
- found_one = False
- for node in nodes:
- if node[1].getId().find(nodename) != (-1):
- continue
-
- try:
- rc = RicciCommunicator(node[1].getId())
- if not rc:
- raise Exception, 'rc is None'
- except Exception, e:
- luci_log.debug('ricci error for host %s: %s' \
- % (node[0], str(e)))
- continue
-
- if not rc.authed():
- rc = None
- try:
- snode = getStorageNode(self, node[1].getId())
- setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
- except:
- pass
-
- try:
- setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH)
- except:
- pass
-
- continue
- found_one = True
- break
-
- if not found_one:
- return None
-
- batch_number, result = nodeFence(rc, nodename)
- if batch_number is None or result is None:
- luci_log.debug_verbose('nodeFence: batch_number and/or result is None')
- return None
-
- path = str(path + "/" + nodename_resolved)
- batch_id = str(batch_number)
- objname = str(nodename_resolved + "____flag")
- objpath = str(path + "/" + objname)
-
- try:
- nodefolder = self.restrictedTraverse(path)
- nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID, batch_id, "string")
- flag.manage_addProperty(TASKTYPE, NODE_FENCE, "string")
- flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being fenced", "string")
- except Exception, e:
- luci_log.debug_verbose('nodeFence err: creating flags at %s: %s' \
- % (path, str(e)))
-
- response = request.RESPONSE
#Once again, is this correct? Should we re-direct to the cluster page?
+ response = request.RESPONSE
response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
elif task == NODE_DELETE:
- #We need to get a node name other than the node
- #to be deleted, then delete the node from the cluster.conf
- #and propogate it. We will need two ricci agents for this task.
-
- # Make sure we can find a second node before we hose anything.
- path = str(CLUSTER_FOLDER_PATH + clustername)
- try:
- clusterfolder = self.restrictedTraverse(path)
- if not clusterfolder:
- raise Exception, 'no cluster folder at %s' % path
- except Exception, e:
- luci_log.debug_verbose('node delete error for cluster %s: %s' \
- % (clustername, str(e)))
- return None
-
- try:
- nodes = clusterfolder.objectItems('Folder')
- if not nodes or len(nodes) < 1:
- raise Exception, 'no cluster nodes in DB'
- except Exception, e:
- luci_log.debug_verbose('node delete error for cluster %s: %s' \
- % (clustername, str(e)))
-
- found_one = False
- for node in nodes:
- if node[1].getId().find(nodename) != (-1):
- continue
- #here we make certain the node is up...
- # XXX- we should also make certain this host is still
- # in the cluster we believe it is.
- try:
- rc2 = RicciCommunicator(node[1].getId())
- except Exception, e:
- luci_log.info('ricci %s error: %s' % (node[0], str(e)))
- continue
- except:
- continue
-
- if not rc2.authed():
- try:
- setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH)
- except:
- pass
-
- try:
- snode = getStorageNode(self, node[0])
- setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
- except:
- pass
-
- luci_log.debug_verbose('%s is not authed' % node[0])
- rc2 = None
- continue
- else:
- found_one = True
- break
-
- if not found_one:
- luci_log.debug_verbose('unable to find ricci node to delete %s from %s' % (nodename, clustername))
+ if nodeDelete(self, rc, model, clustername, nodename, nodename_resolved) is None:
+ luci_log.debug_verbose('NTP: nodeDelete failed')
return None
-
- #First, delete cluster.conf from node to be deleted.
- #next, have node leave cluster.
- batch_number, result = nodeLeaveCluster(rc, purge=True)
- if batch_number is None or result is None:
- luci_log.debug_verbose('nodeDelete: batch_number and/or result is None')
- return None
-
- #It is not worth flagging this node in DB, as we are going
- #to delete it anyway. Now, we need to delete node from model
- #and send out new cluster.conf
- delete_target = None
- nodelist = model.getNodes()
- find_node = lower(nodename)
- for n in nodelist:
- try:
- if lower(n.getName()) == find_node:
- delete_target = n
- break
- except:
- continue
-
- if delete_target is None:
- luci_log.debug_verbose('unable to find delete target for %s in %s' \
- % (nodename, clustername))
- return None
-
- model.deleteNode(delete_target)
-
- try:
- str_buf = model.exportModelAsString()
- if not str_buf:
- raise Exception, 'model string is blank'
- except Exception, e:
- luci_log.debug_verbose('NTP exportModelAsString: %s' % str(e))
- return None
-
- # propagate the new cluster.conf via the second node
- batch_number, result = setClusterConf(rc2, str(str_buf))
- if batch_number is None:
- luci_log.debug_verbose('batch number is None after del node in NTP')
- return None
-
- #Now we need to delete the node from the DB
- path = str(CLUSTER_FOLDER_PATH + clustername)
- del_path = str(path + "/" + nodename_resolved)
-
- try:
- delnode = self.restrictedTraverse(del_path)
- clusterfolder = self.restrictedTraverse(path)
- clusterfolder.manage_delObjects(delnode[0])
- except Exception, e:
- luci_log.debug_verbose('error deleting %s: %s' % (del_path, str(e)))
-
- batch_id = str(batch_number)
- objname = str(nodename_resolved + "____flag")
- objpath = str(path + "/" + objname)
-
- try:
- clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID, batch_id, "string")
- flag.manage_addProperty(TASKTYPE, NODE_DELETE, "string")
- flag.manage_addProperty(FLAG_DESC, "Deleting node \'" + nodename + "\'", "string")
- except Exception, e:
- luci_log.debug_verbose('nodeDelete %s err setting flag at %s: %s' \
- % (nodename, objpath, str(e)))
-
response = request.RESPONSE
response.redirect(request['HTTP_REFERER'] + "&busyfirst=true")
@@ -2951,7 +2992,8 @@
except:
fd = None #Set to None in case last time thru loop
continue
- if fd != None:
+
+ if fd is not None:
if fd.isShared() == False: #Not a shared dev...build struct and add
fencedev = {}
fencedev['prettyname'] = FENCE_OPTS[fd.getAgentType()]
@@ -2974,7 +3016,7 @@
last_kid_fd = None
level1.append(fencedev)
else: #This dev is shared
- if (last_kid_fd != None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd
+ if (last_kid_fd is not None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd
instance_struct = {}
instance_struct['id'] = str(minor_num)
minor_num = minor_num + 1
@@ -3045,7 +3087,7 @@
except:
fd = None #Set to None in case last time thru loop
continue
- if fd != None:
+ if fd is not None:
if fd.isShared() == False: #Not a shared dev...build struct and add
fencedev = {}
fencedev['prettyname'] = FENCE_OPTS[fd.getAgentType()]
@@ -3068,7 +3110,7 @@
last_kid_fd = None
level2.append(fencedev)
else: #This dev is shared
- if (last_kid_fd != None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd
+ if (last_kid_fd is not None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd
instance_struct = {}
instance_struct['id'] = str(minor_num)
minor_num = minor_num + 1
@@ -3584,7 +3626,7 @@
def getResourceInfo(modelb, request):
if not modelb:
- luci_log.debug_verbose('no modelb obj in getResourceInfo')
+ luci_log.debug_verbose('GRI0: no modelb object in session')
return {}
name = None
@@ -4539,6 +4581,24 @@
modelb.setIsVirtualized(isVirtualized)
return modelb
+def getModelForCluster(self, clustername):
+ rc = getRicciAgent(self, clustername)
+ if not rc:
+ luci_log.debug_verbose('GMFC0: unable to find a ricci agent for %s' \
+ % clustername)
+ return None
+
+ try:
+ model = getModelBuilder(None, rc, rc.dom0())
+ if not model:
+ raise Exception, 'model is none'
+ except Exception, e:
+ luci_log.debug_verbose('GMFC1: unable to get model builder for %s: %s' \
+ % (clustername, str(e)))
+ return None
+
+ return model
+
def set_node_flag(self, cluname, agent, batchid, task, desc):
path = str(CLUSTER_FOLDER_PATH + cluname)
batch_id = str(batchid)
@@ -4551,7 +4611,7 @@
flag = self.restrictedTraverse(objpath)
flag.manage_addProperty(BATCH_ID, batch_id, 'string')
flag.manage_addProperty(TASKTYPE, task, 'string')
- flag.manage_addProperty(FLAG_DESC, desc)
+ flag.manage_addProperty(FLAG_DESC, desc, 'string')
except Exception, e:
errmsg = 'Error creating flag (%s,%s,%s)@%s: %s' \
% (batch_id, task, desc, objpath, str(e))
--- conga/luci/site/luci/Extensions/conga_constants.py 2006/11/06 23:55:23 1.23
+++ conga/luci/site/luci/Extensions/conga_constants.py 2006/11/09 20:32:02 1.24
@@ -43,6 +43,12 @@
FENCEDEV_CONFIG="53"
FENCEDEV="54"
+#Cluster tasks
+CLUSTER_STOP = '1000'
+CLUSTER_START = '1001'
+CLUSTER_RESTART = '1002'
+CLUSTER_DELETE = '1003'
+
#General tasks
NODE_LEAVE_CLUSTER="100"
NODE_JOIN_CLUSTER="101"
next reply other threads:[~2006-11-09 20:32 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-11-09 20:32 rmccabe [this message]
-- strict thread matches above, loose matches on Subject: below --
2007-09-21 3:11 [Cluster-devel] conga/luci cluster/form-macros site/luci/Exten rmccabe
2007-06-19 15:54 rmccabe
2007-05-03 20:16 rmccabe
2007-03-15 16:41 rmccabe
2007-03-14 22:38 rmccabe
2007-03-14 22:37 rmccabe
2007-03-05 16:50 rmccabe
2007-03-05 16:50 rmccabe
2007-03-05 16:49 rmccabe
2007-02-15 22:44 rmccabe
2007-02-08 3:46 rmccabe
2007-02-07 17:02 rmccabe
2007-02-07 16:55 rmccabe
2007-02-02 4:34 rmccabe
2007-02-02 0:11 rmccabe
2007-02-01 20:49 rmccabe
2007-01-31 23:36 rmccabe
2007-01-31 5:26 rmccabe
2007-01-23 13:53 rmccabe
2007-01-15 18:21 rmccabe
2007-01-11 19:11 rmccabe
2007-01-10 21:40 rmccabe
2007-01-06 3:29 rmccabe
2006-12-14 23:14 rmccabe
2006-12-14 18:22 rmccabe
2006-12-11 22:42 rmccabe
2006-12-11 21:51 rmccabe
2006-12-06 22:11 rmccabe
2006-12-06 21:16 rmccabe
2006-11-13 21:40 rmccabe
2006-11-12 2:10 rmccabe
2006-11-03 22:48 rmccabe
2006-10-25 1:53 rmccabe
2006-10-25 1:11 rmccabe
2006-10-13 21:25 rmccabe
2006-08-03 18:36 shuennek
2006-07-21 14:49 rmccabe
2006-07-20 16:59 rmccabe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061109203203.11621.qmail@sourceware.org \
--to=rmccabe@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.