From mboxrd@z Thu Jan 1 00:00:00 1970 From: rmccabe@sourceware.org Date: 24 Oct 2006 16:36:24 -0000 Subject: [Cluster-devel] conga/luci/site/luci/Extensions LuciSyslog.py ... Message-ID: <20061024163624.28087.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: conga Branch: RHEL5 Changes by: rmccabe at sourceware.org 2006-10-24 16:36:23 Modified files: luci/site/luci/Extensions: LuciSyslog.py cluster_adapters.py conga_constants.py homebase_adapters.py ricci_bridge.py ricci_communicator.py Log message: see bz# 211375 These patches add verbose logging that'll allow QE to better debug defects (in a lot of places, there was no useful output for them to check). Two new global variables are introduced: LUCI_DEBUG_MODE = 1 LUCI_DEBUG_VERBOSITY = 2 Both of these need to be set to 0 for GA. LUCI_DEBUG_VERBOSITY > 1 will cause ricci_xml input and output to be logged using using syslog LOG_DAEMON/LOG_DEBUG. syslogd is not configured to log this severity by default, so an entry along the lines of *.debug /var/log/debug should be added if this facility is used. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/LuciSyslog.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2&r2=1.2.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/cluster_adapters.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.120.2.4&r2=1.120.2.5 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/conga_constants.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.19&r2=1.19.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/homebase_adapters.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.34&r2=1.34.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/ricci_bridge.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.30.2.1&r2=1.30.2.2 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/ricci_communicator.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.9&r2=1.9.2.1 --- conga/luci/site/luci/Extensions/LuciSyslog.py 2006/10/16 04:59:52 1.2 +++ conga/luci/site/luci/Extensions/LuciSyslog.py 2006/10/24 16:36:23 1.2.2.1 @@ -1,3 +1,4 @@ +from conga_constants import LUCI_DEBUG_MODE, LUCI_DEBUG_VERBOSITY from syslog import openlog, closelog, syslog, \ LOG_DAEMON, LOG_PID, LOG_NDELAY, LOG_INFO, \ LOG_WARNING, LOG_AUTH, LOG_DEBUG @@ -12,34 +13,55 @@ """ class LuciSyslog: def __init__(self): + self.__init = 0 try: - openlog('luci', LOG_DAEMON, LOG_PID | LOG_NDELAY) + openlog('luci', LOG_PID | LOG_NDELAY, LOG_DAEMON) + self.__init = 1 except: raise LuciSyslogError, 'unable to setup syslog functionality.' def info(self, msg): + if not self.__init: + return try: syslog(LOG_INFO, msg) except: raise LuciSyslogError, 'syslog info call failed' def warn(self, msg): + if not self.__init: + return try: syslog(LOG_WARNING, msg) except: raise LuciSyslogError, 'syslog warn call failed' def private(self, msg): + if not self.__init: + return try: syslog(LOG_AUTH, msg) except: raise LuciSyslogError, 'syslog private call failed' + def debug_verbose(self, msg): + if not LUCI_DEBUG_MODE or LUCI_DEBUG_VERBOSITY < 2 or not self.__init: + return + try: + syslog(LOG_DEBUG, msg) + except: + raise LuciSyslogError, 'syslog debug calle failed' + def debug(self, msg): + if not LUCI_DEBUG_MODE or not self.__init: + return try: syslog(LOG_DEBUG, msg) except: raise LuciSyslogError, 'syslog debug calle failed' def close(self): - closelog() + try: + closelog() + except: + pass --- conga/luci/site/luci/Extensions/cluster_adapters.py 2006/10/24 01:42:52 1.120.2.4 +++ conga/luci/site/luci/Extensions/cluster_adapters.py 2006/10/24 16:36:23 1.120.2.5 @@ -22,7 +22,8 @@ from clusterOS import resolveOSType from GeneralError import GeneralError from UnknownClusterError import UnknownClusterError -from homebase_adapters import nodeUnauth, nodeAuth, manageCluster, createClusterSystems, havePermCreateCluster, setNodeFlag, delNodeFlag, userAuthenticated +from homebase_adapters import nodeUnauth, nodeAuth, manageCluster, createClusterSystems, havePermCreateCluster, setNodeFlag, delNodeFlag, userAuthenticated, getStorageNode, getClusterNode +from LuciSyslog import LuciSyslogError, LuciSyslog #Policy for showing the cluster chooser menu: #1) If there are no clusters in the ManagedClusterSystems @@ -34,6 +35,11 @@ CLUSTER_FOLDER_PATH = '/luci/systems/cluster/' +try: + luci_log = LuciSyslog() +except LuciSyslogError, e: + pass + def validateClusterNodes(request, sessionData, clusterName, numStorage): nodeList = list() nodeHash = {} @@ -205,11 +211,24 @@ batch_id_map = {} rc = None for i in nodeList: + success = True try: rc = RicciCommunicator(i['ricci_host']) - resultNode = rc.process_batch(batchNode, async=True) - batch_id_map[i['ricci_host']] = resultNode.getAttribute('batch_id') + except RicciError, e: + luci_log.debug('Unable to connect to the ricci agent on %s: %s'\ + % (i['ricci_host'], str(e))) + success = False except: + success = False + + if success == True: + try: + resultNode = rc.process_batch(batchNode, async=True) + batch_id_map[i['ricci_host']] = resultNode.getAttribute('batch_id') + except: + success = False + + if not success: nodeUnauth(nodeList) cluster_properties['isComplete'] = False errors.append('An error occurred while attempting to add cluster node \"' + i['ricci_host'] + '\"') @@ -294,6 +313,7 @@ clusterObj = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName) cluster_os = clusterObj.manage_getProperty('cluster_os') if not cluster_os: + luci_log.debug('The cluster OS property is missing for cluster ' + clusterName) raise Exception, 'no cluster OS was found.' try: if len(filter(lambda x: x['os'] != cluster_os, nodeList)) > 0: @@ -342,17 +362,28 @@ batch_id_map = {} for i in nodeList: clunode = nodeList[i] + success = True try: rc = RicciCommunicator(clunode['ricci_host']) - resultNode = rc.process_batch(batchNode, async=True) - batch_id_map[clunode['ricci_host']] = resultNode.getAttribute('batch_id') - messages.append('Cluster join initiated for host \"' + clunode['ricci_host'] + '\"') except: + luci_log.info('Unable to connect to the ricci daemon on host ' + clunode['ricci_host']) + success = False + + if success: + try: + resultNode = rc.process_batch(batchNode, async=True) + batch_id_map[clunode['ricci_host']] = resultNode.getAttribute('batch_id') + except: + success = False + + if not success: nodeUnauth(nodeList) cluster_properties['isComplete'] = False errors.append('An error occurred while attempting to add cluster node \"' + clunode['ricci_host'] + '\"') return (False, {'errors': errors, 'requestResults': cluster_properties}) + messages.append('Cluster join initiated for host \"' + clunode['ricci_host'] + '\"') + buildClusterCreateFlags(self, batch_id_map, clusterName) return (True, {'errors': errors, 'messages': messages}) @@ -412,6 +443,7 @@ try: resObj = resourceAddHandler[res_type](self, dummy_form) except: + luci_log('res type %d is invalid' % res_type) resObj = None if resObj is None: @@ -1304,9 +1336,12 @@ try: clusterfolder = self.restrictedTraverse(path) if not clusterfolder: + luci_log.debug('cluster folder %s for %s is missing.' \ + % (path, clustername)) raise nodes = clusterfolder.objectItems('Folder') if len(nodes) < 1: + luci_log.debug('no cluster nodes for %s found.' % clustername) return None except: return None @@ -1324,15 +1359,15 @@ try: rc = RicciCommunicator(hostname) - if not rc: - raise - except: - #raise Exception, ('unable to communicate with the ricci agent on %s', hostname) + except RicciError, e: + luci_log.debug('ricci error: %s' % str(e)) continue try: clu_info = rc.cluster_info() if cluname != lower(clu_info[0]) and cluname != lower(clu_info[1]): + luci_log.debug('%s reports it\'s in cluster %s:%s; we expect %s' \ + % (hostname, clu_info[0], clu_info[1], cluname)) # node reports it's in a different cluster raise except: @@ -1340,7 +1375,9 @@ if rc.authed(): return rc - setNodeFlag(self, node[1], CLUSTER_NODE_NEED_AUTH) + setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH) + + luci_log.debug('no ricci agent could be found for cluster %s' % cluname) return None def getRicciAgentForCluster(self, req): @@ -1352,11 +1389,13 @@ if not clustername: raise except: + luci_log.debug('no cluster name was specified in getRicciAgentForCluster') return None return getRicciAgent(self, clustername) def getClusterStatus(self, rc): clustatus_batch ='' + try: clustatuscmd_xml = minidom.parseString(clustatus_batch).firstChild except: @@ -1364,6 +1403,8 @@ try: ricci_xml = rc.process_batch(clustatuscmd_xml, async=False) + except RicciError, e: + luci_log.debug('ricci error: %s', str(e)) except: return {} @@ -1968,6 +2009,7 @@ try: clustername = request.form['clusterName'] except: + luci_log.debug('missing cluster name for NTP') return None try: @@ -1976,20 +2018,21 @@ try: nodename = request.form['nodename'] except: + luci_log.debug('missing nodename name for NTP') return None try: task = request['task'] - if not task: - raise except KeyError, e: try: task = request.form['task'] except: + luci_log.debug('missing task for NTP') return None nodename_resolved = resolve_nodename(self, clustername, nodename) if not nodename_resolved or not nodename or not task or not clustername: + luci_log.debug('resolve_nodename failed for NTP') return None if task != NODE_FENCE: @@ -1998,33 +2041,81 @@ # to be performed. try: rc = RicciCommunicator(nodename_resolved) - # XXX - check the cluster - if not rc.authed(): - # set the flag - rc = None - - if not rc: - raise + except RicciError, e: + luci_log.debug('ricci error from %s: %s' \ + % (nodename_resolved, str(e))) + return None except: return None + cluinfo = rc.cluster_info() + if not cluinfo[0] and not cluinfo[1]: + luci_log.debug('host %s not in a cluster (expected %s)' \ + % (nodename_resolved, clustername)) + return None + + cname = lower(clustername) + if cname != lower(cluinfo[0]) and cname != lower(cluinfo[1]): + luci_log.debug('host %s in unknown cluster %s:%s (expected %s)' \ + % (nodename_resolved, cluinfo[0], cluinfo[1], clustername)) + return None + + if not rc.authed(): + rc = None + try: + snode = getStorageNode(self, nodename) + setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) + except: + # we'll hit it again, and try again then + pass + + try: + cnode = getClusterNode(self, nodename, clustername) + setNodeFlag(cnode, CLUSTER_NODE_NEED_AUTH) + except: + # we'll hit it again, and try again then + pass + + if rc is None: + return None + if task == NODE_LEAVE_CLUSTER: - batch_number, result = nodeLeaveCluster(rc) + path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved) - path = CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved - nodefolder = self.restrictedTraverse(path) + try: + nodefolder = self.restrictedTraverse(path) + if not nodefolder: + raise Exception, 'cannot find directory at %s' % path + except Exception, e: + luci_log.debug('node_leave_cluster err: %s' % str(e)) + return None + + objname = str(nodename_resolved + "____flag") + + fnpresent = noNodeFlagsPresent(self, nodefolder, objname, nodename_resolved) + if fnpresent is None: + luci_log.debug('An error occurred while checking flags for %s' \ + % nodename_resolved) + return None + + if fnpresent == False: + luci_log.debug('flags are still present for %s -- bailing out' \ + % nodename_resolved) + return None + + batch_number, result = nodeLeaveCluster(rc) batch_id = str(batch_number) - objname = nodename_resolved + "____flag" - if noNodeFlagsPresent(self, nodefolder, objname, nodename_resolved) == False: - raise UnknownClusterError("Fatal", "An unfinished task flag exists for node %s" % nodename) - nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - objpath = path + "/" + objname - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID,batch_id, "string") - flag.manage_addProperty(TASKTYPE,NODE_LEAVE_CLUSTER, "string") - flag.manage_addProperty(FLAG_DESC,"Node \'" + nodename + "\' leaving cluster", "string") + objpath = str(path + "/" + objname) + try: + nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) + #Now we need to annotate the new DB object + flag = self.restrictedTraverse(objpath) + flag.manage_addProperty(BATCH_ID, batch_id, "string") + flag.manage_addProperty(TASKTYPE,NODE_LEAVE_CLUSTER, "string") + flag.manage_addProperty(FLAG_DESC,"Node \'" + nodename + "\' leaving cluster", "string") + except: + luci_log.debug('An error occurred while setting flag %s' % objpath) response = request.RESPONSE #Is this correct? Should we re-direct to the cluster page? @@ -2056,40 +2147,64 @@ #Now we need to annotate the new DB object objpath = path + "/" + objname flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID,batch_id, "string") - flag.manage_addProperty(TASKTYPE,NODE_REBOOT, "string") - flag.manage_addProperty(FLAG_DESC,"Node \'" + nodename + "\' is being rebooted", "string") + flag.manage_addProperty(BATCH_ID, batch_id, "string") + flag.manage_addProperty(TASKTYPE, NODE_REBOOT, "string") + flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being rebooted", "string") response = request.RESPONSE #Once again, is this correct? Should we re-direct to the cluster page? response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername) elif task == NODE_FENCE: #here, we DON'T want to open connection to node to be fenced. - path = CLUSTER_FOLDER_PATH + clustername + path = str(CLUSTER_FOLDER_PATH + clustername) try: clusterfolder = self.restrictedTraverse(path) if not clusterfolder: raise except: + luci_log.debug('The cluster folder for %s could not be found.' \ + % clustername) + return None + + try: + nodes = clusterfolder.objectItems('Folder') + except: + luci_log.debug('No cluster nodes for %s were found' % clustername) return None - nodes = clusterfolder.objectItems('Folder') found_one = False for node in nodes: - if node[1].getID().find(nodename) != (-1): + if node[1].getId().find(nodename) != (-1): continue try: rc = RicciCommunicator(node[1].getId()) - if not rc.authed(): - # set the node flag - rc = None if not rc: - raise - found_one = True - break + continue + except RicciError, e: + luci_log.debug('ricci error for host %s: %s' \ + % (node[0], str(e))) + continue except: continue + + if not rc.authed(): + rc = None + try: + snode = getStorageNode(self, node[1].getId()) + setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) + except: + pass + + try: + setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH) + except: + pass + + continue + found_one = True + break + if not found_one: return None @@ -2149,7 +2264,7 @@ #First, delete cluster.conf from node to be deleted. #next, have node leave cluster. - batch_number, result = nodeLeaveCluster(rc) + batch_number, result = nodeLeaveCluster(rc, purge=True) #It is not worth flagging this node in DB, as we are going #to delete it anyway. Now, we need to delete node from model @@ -2501,13 +2616,21 @@ except: return "Unable to resolve node name %s to retrieve logging information" % nodename_resolved + if not rc.authed(): + try: + snode = getStorageNode(self, nodename) + setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) + except: + pass + return "Luci is not authenticated to node %s. Please reauthenticate first." % nodename + return getNodeLogs(rc) def processXenVM(self, req): model = req.SESSION.get('model') isNew = False try: - xenvmname = req ['servicename'] + xenvmname = req['servicename'] except KeyError, e: isNew = True @@ -2530,14 +2653,27 @@ def getXenVMInfo(self, model, request): - try: - xenvmname = request['servicename'] - except KeyError, e: - return {} - - xenvm = model.retrieveXenVMsByName(xenvmname) - map = xenvm.getAttributes() - return map + try: + xenvmname = request['servicename'] + except KeyError, e: + try: + xenvmname = request.form['servicename'] + except: + luci_log.debug_verbose('servicename is missing from request') + return {} + except: + luci_log.debug_verbose('servicename is missing from request') + return {} + + try: + xenvm = model.retrieveXenVMsByName(xenvmname) + except: + luci_log.debug('An error occurred while attempting to get VM %s' \ + % xenvmname) + return {} + + map = xenvm.getAttributes() + return map def isClusterBusy(self, req): items = None @@ -2556,21 +2692,29 @@ try: cluname = req.form['clusterName'] except: + luci_log.debug_verbose('No cluster name -- returning empty map') return map path = CLUSTER_FOLDER_PATH + cluname try: clusterfolder = self.restrictedTraverse(str(path)) if not clusterfolder: - raise - except: + raise Exception, 'clusterfolder is None' + except Exception, e: + luci_log.debug_verbose('cluster %s [%s] folder missing: %s -- returning empty map' % (cluname, path, str(e))) return map + except: + luci_log.debug_verbose('cluster %s [%s] folder missing: returning empty map' % (cluname, path)) try: items = clusterfolder.objectItems('ManagedSystem') - if len(items) == 0: + if not items or len(items) < 1: return map #This returns an empty map, and should indicate not busy + except Exception, e: + luci_log.debug('An error occurred while looking for cluster %s flags at path %s: %s' % (cluname, path, str(e))) + return map except: + luci_log.debug('An error occurred while looking for cluster %s flags@path %s' % (cluname, path)) return map map['busy'] = "true" @@ -2601,14 +2745,30 @@ node_report['desc'] = item[1].getProperty(FLAG_DESC) batch_xml = None ricci = item[0].split("____") #This removes the 'flag' suffix + try: rc = RicciCommunicator(ricci[0]) - batch_xml = rc.batch_report(item[1].getProperty(BATCH_ID)) - if batch_xml != None: - (creation_status, total) = batch_status(batch_xml) + except RicciError, e: + rc = None + luci_log.debug_verbose('ricci returned error in iCB for %s: %s' \ + % (cluname, str(e))) except: - creation_status = RICCI_CONNECT_FAILURE #No contact with ricci (-1000) - batch_xml = "bloody_failure" #set to avoid next if statement + rc = None + luci_log.info('ricci connection failed for cluster %s' % cluname) + + if rc is not None: + try: + batch_xml = rc.batch_report(item[1].getProperty(BATCH_ID)) + if batch_xml != None: + (creation_status, total) = batch_status(batch_xml) + else: + luci_log.debug_verbose('batch report for cluster %s, item %s is None' % (cluname, item[0])) + except: + creation_status = RICCI_CONNECT_FAILURE #No contact with ricci (-1000) + batch_xml = "bloody_failure" #set to avoid next if statement + else: + creation_status = RICCI_CONNECT_FAILURE #No contact with ricci (-1000) + batch_xml = "bloody_failure" #set to avoid next if statement if batch_xml == None: #The job is done and gone from queue if redirect_message == False: #We have not displayed this message yet @@ -2617,6 +2777,8 @@ node_report['errormessage'] = "" nodereports.append(node_report) redirect_message = True + + luci_log.debug_verbose('batch job is done -- deleting %s' % item[0]) clusterfolder.manage_delObjects(item[0]) continue @@ -2667,7 +2829,10 @@ node_report['statusmessage'] = "Node created successfully" + REDIRECT_MSG node_report['statusindex'] = creation_status nodereports.append(node_report) - clusterfolder.manage_delObjects(item[0]) + try: + clusterfolder.manage_delObjects(item[0]) + except Exception, e: + luci_log.info('Unable to delete %s: %s' % (item[0], str(e))) continue else: map['busy'] = "true" @@ -2690,13 +2855,17 @@ if finished == True: node_report['desc'] = item[1].getProperty(FLAG_DESC) + REDIRECT_MSG nodereports.append(node_report) - clusterfolder.manage_delObjects(item[0]) + try: + clusterfolder.manage_delObjects(item[0]) + except Exception, e: + luci_log.info('Unable to delete %s: %s' % (item[0], str(e))) else: node_report = {} map['busy'] = "true" isBusy = True node_report['desc'] = item[1].getProperty(FLAG_DESC) nodereports.append(node_report) + if isBusy: part1 = req['ACTUAL_URL'] part2 = req['QUERY_STRING'] @@ -2716,12 +2885,14 @@ def getClusterOS(self, rc): map = {} + try: os_str = resolveOSType(rc.os()) map['os'] = os_str map['isVirtualized'] = rc.dom0() except: # default to rhel5 if something crazy happened. + luci_log.debug('An error occurred while attempting to get OS/Virt info for %s -- defaulting to rhel5/False' % rc.hostname()) map['os'] = 'rhel5' map['isVirtualized'] = False return map @@ -2736,8 +2907,10 @@ try: cluname = request.form['clustername'] except: + luci_log.debug_verbose('getResourcesInfo missing cluster name') return resList except: + luci_log.debug_verbose('getResourcesInfo missing cluster name') return resList for item in modelb.getResources(): @@ -2757,8 +2930,10 @@ try: name = request.form['resourcename'] except: + luci_log.debug_verbose('getResourceInfo missing res name') return {} except: + luci_log.debug_verbose('getResourceInfo missing res name') return {} try: @@ -2767,19 +2942,22 @@ try: cluname = request.form['clustername'] except: + luci_log.debug_verbose('getResourceInfo missing cluster name') return {} except: + luci_log.debug_verbose('getResourceInfo missing cluster name') return {} try: baseurl = request['URL'] except: + luci_log.debug_verbose('getResourceInfo missing URL') return {} for res in modelb.getResources(): if res.getName() == name: - resMap = {} try: + resMap = {} resMap['name'] = res.getName() resMap['type'] = res.resource_type resMap['tag_name'] = res.TAG_NAME @@ -2787,7 +2965,7 @@ resMap['cfgurl'] = baseurl + "?" + "clustername=" + cluname + "&resourcename=" + res.getName() + "&pagetype=" + RESOURCE_CONFIG return resMap except: - return {} + continue def delResource(self, rc, request): errstr = 'An error occurred in while attempting to set the cluster.conf' @@ -2795,11 +2973,19 @@ try: modelb = request.SESSION.get('model') except: + luci_log.debug_verbose('delResource unable to extract model from SESSION') return errstr try: name = request['resourcename'] except KeyError, e: + try: + name = request.form['resourcename'] + except: + luci_log.debug_verbose('delResource missing resname %s' % str(e)) + return errstr + ': ' + str(e) + except: + luci_log.debug_verbose('delResource missing resname') return errstr + ': ' + str(e) try: @@ -2808,6 +2994,7 @@ try: clustername = request.form['clustername'] except: + luci_log.debug_verbose('delResource missing cluster name') return errstr + ': could not determine the cluster name.' try: @@ -2828,6 +3015,7 @@ break if not found: + luci_log.debug_verbose('delresource cant find res %s' % name) return errstr + ': the specified resource was not found.' try: @@ -2835,10 +3023,12 @@ if not conf: raise except: + luci_log.debug_verbose('exportModelAsString failed') return errstr batch_number, result = setClusterConf(str(conf)) if batch_number is None or result is None: + luci_log.debug_verbose('missing batch and/or result from setClusterConf') return errstr modelstr = "" @@ -2846,13 +3036,20 @@ clusterfolder = self.restrictedTraverse(path) batch_id = str(batch_number) objname = str(ragent) + '____flag' - clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object objpath = str(path + '/' + objname) - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, RESOURCE_REMOVE, "string") - flag.manage_addProperty(FLAG_DESC, "Removing Resource \'" + request['resourcename'] + "\'", "string") + + try: + clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) + #Now we need to annotate the new DB object + flag = self.restrictedTraverse(objpath) + flag.manage_addProperty(BATCH_ID, batch_id, "string") + flag.manage_addProperty(TASKTYPE, RESOURCE_REMOVE, "string") + flag.manage_addProperty(FLAG_DESC, "Removing Resource \'" + request['resourcename'] + "\'", "string") + except Exception, e: + luci_log.debug('An error occurred while setting flag %s: %s' \ + % (objname, str(e))) + except: + luci_log.debug('An error occurred while setting flag %s' % objname) response = request.RESPONSE response.redirect(request['HTTP_REFERER'] + "&busyfirst=true") @@ -2860,8 +3057,8 @@ def addIp(request, form=None): if form is None: form = request.form - modelb = request.SESSION.get('model') + modelb = request.SESSION.get('model') if not modelb or not form: return None @@ -2976,6 +3173,7 @@ def addGfs(request, form=None): if form is None: form = request.form + modelb = request.SESSION.get('model') if not modelb: return None @@ -2986,13 +3184,21 @@ if not oldname: raise KeyError('oldname is blank.') res = getResourceForEdit(modelb, oldname) + if not res: + luci_log.debug('resource %s was not found for editing' % oldname) + return None except KeyError, e: + luci_log.debug('resource %s was not found for editing: %s' \ + % (oldname, str(e))) return None else: - res = apply(Clusterfs) - - if not res: - return None + try: + res = apply(Clusterfs) + if not res: + raise + except: + luci_log.debug('Error creating node Clusterfs resource') + return None # XXX: sanity check these fields try: @@ -3001,30 +3207,35 @@ raise res.attr_hash['name'] = name except: + luci_log.debug_verbose('name is missing in clusterfs res') return None try: mountpoint = form['mountpoint'].strip() res.attr_hash['mountpoint'] = mountpoint except: + luci_log.debug_verbose('mountpoint is missing in clusterfs res') return None try: device = form['device'].strip() res.attr_hash['device'] = device except: + luci_log.debug_verbose('device is missing in clusterfs res') return None try: options = form['options'].strip() res.attr_hash['options'] = options except: + luci_log.debug_verbose('options is missing in clusterfs res') return None try: fsid = form['fsid'].strip() res.attr_hash['fsid'] = fsid except: + luci_log.debug_verbose('fsid is missing in clusterfs res') return None if form.has_key('forceunmount'): @@ -3280,16 +3491,20 @@ try: mb_nodes = modelb.getNodes() if not mb_nodes or not len(mb_nodes): - raise - except: - return 'Unable to find cluster nodes for ' + clusterName + raise Exception, 'node list is empty' + except Exception, e: + luci_log.debug_verbose('no model builder nodes found for %s: %s' \ + % (str(e), clusterName)) + return 'Unable to find cluster nodes for %s' % clusterName try: cluster_node = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName) if not cluster_node: - raise - except: - return 'Unable to find an entry for ' + clusterName + ' in the Luci database.' + raise Exception, 'cluster node is none' + except Exception, e: + luci_log.debug('cant find cluster node for %s: %s' + % (clusterName, str(e))) + return 'Unable to find an entry for %s in the Luci database.' % clusterName try: db_nodes = map(lambda x: x[0], cluster_node.objectItems('Folder')) @@ -3376,9 +3591,11 @@ try: ragent = rc.hostname() if not ragent: + luci_log.debug('missing hostname') raise batch_number, result = setClusterConf(str(conf)) if batch_number is None or result is None: + luci_log.debug('missing batch_number or result') raise except: return "Some error occured in setClusterConf\n" @@ -3387,17 +3604,24 @@ clusterfolder = self.restrictedTraverse(path) batch_id = str(batch_number) objname = str(ragent + '____flag') - clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object objpath = str(path + '/' + objname) - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, RESOURCE_ADD, "string") - if type != 'ip': - flag.manage_addProperty(FLAG_DESC, "Creating New Resource \'" + request.form['resourceName'] + "\'", "string") - else: - flag.manage_addProperty(FLAG_DESC, "Creating New Resource \'" + res.attr_hash['address'] + "\'", "string") + try: + clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) + #Now we need to annotate the new DB object + flag = self.restrictedTraverse(objpath) + flag.manage_addProperty(BATCH_ID, batch_id, "string") + flag.manage_addProperty(TASKTYPE, RESOURCE_ADD, "string") + + if type != 'ip': + flag.manage_addProperty(FLAG_DESC, "Creating New Resource \'" + request.form['resourceName'] + "\'", "string") + else: + flag.manage_addProperty(FLAG_DESC, "Creating New Resource \'" + res.attr_hash['address'] + "\'", "string") + except Exception, e: + try: + luci_log.info('Unable to create flag %s: %s' % (objpath, str(e))) + except: + pass response = request.RESPONSE response.redirect(request['HTTP_REFERER'] + "&busyfirst=true") @@ -3410,48 +3634,92 @@ if res.getName() == name: resPtr.removeChild(res) return res + + luci_log.debug_verbose('unable to find resource \"%s\"' % name) raise KeyError, name def appendModel(request, model): try: request.SESSION.set('model', model) except: - pass - - return False + luci_log.debug_verbose('Appending model to request failed') + return False def resolve_nodename(self, clustername, nodename): - path = CLUSTER_FOLDER_PATH + clustername - clusterfolder = self.restrictedTraverse(path) - objs = clusterfolder.objectItems('Folder') + path = str(CLUSTER_FOLDER_PATH + clustername) + + try: + clusterfolder = self.restrictedTraverse(path) + objs = clusterfolder.objectItems('Folder') + except Exception, e: + luci_log.info('resolve_nodename failed for %s/%s: %s' \ + % (nodename, clustername, str(e))) + for obj in objs: if obj[0].find(nodename) != (-1): return obj[0] - raise + + luci_log.info('resolve_nodename failed for %s/%s' % (nodename, clustername)) + return None def noNodeFlagsPresent(self, nodefolder, flagname, hostname): - items = nodefolder.objectItems('ManagedSystem') + try: + items = nodefolder.objectItems('ManagedSystem') + except: + luci_log.debug('An error occurred while trying to list flags for cluster ' + nodefolder[0]) + return None for item in items: if item[0] != flagname: continue #a flag already exists... try to delete it - rc = RicciCommunicator(hostname) + try: + rc = RicciCommunicator(hostname) + except RicciError, e: + luci_log.info('Unable to connect to the ricci daemon: %s' % str(e)) + return None + + if not rc.authed(): + try: + snode = getStorageNode(self, hostname) + setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) + except: + pass + luci_log.info('Node %s is not authenticated' % item[0]) + return None + finished = checkBatch(rc, item[1].getProperty(BATCH_ID)) if finished == True: try: nodefolder.manage_delObjects(item[0]) - except: - return False + except Exception, e: + luci_log.info('manage_delObjects for %s failed: %s' \ + % (item[0], str(e))) + return None return True else: #Not finished, so cannot remove flag return False + return True -def getModelBuilder(rc,isVirtualized): - cluster_conf_node = getClusterConf(rc) - modelb = ModelBuilder(0, None, None, cluster_conf_node) +def getModelBuilder(rc, isVirtualized): + try: + cluster_conf_node = getClusterConf(rc) + if not cluster_conf_node: + raise; + except: + luci_log.debug('unable to get cluster_conf_node in getModelBuilder') + return None + + try: + modelb = ModelBuilder(0, None, None, cluster_conf_node) + except Exception, e: + try: + luci_log.debug('An error occurred while trying to get modelb for conf \"%s\": %s' % (cluster_conf_node.toxml(), str(e))) + except: + pass + modelb.setIsVirtualized(isVirtualized) return modelb --- conga/luci/site/luci/Extensions/conga_constants.py 2006/10/16 20:46:46 1.19 +++ conga/luci/site/luci/Extensions/conga_constants.py 2006/10/24 16:36:23 1.19.2.1 @@ -113,3 +113,6 @@ CLUSTER_NODE_ADDED = 0x04 PLONE_ROOT='luci' + +LUCI_DEBUG_MODE = 1 +LUCI_DEBUG_VERBOSITY = 2 --- conga/luci/site/luci/Extensions/homebase_adapters.py 2006/10/16 20:46:46 1.34 +++ conga/luci/site/luci/Extensions/homebase_adapters.py 2006/10/24 16:36:23 1.34.2.1 @@ -1367,7 +1367,7 @@ pass return False -def setNodeFlag(self, node, flag_mask): +def setNodeFlag(node, flag_mask): try: flags = node.getProperty('flags') node.manage_changeProperties({ 'flags': flags | flag_mask }) @@ -1377,7 +1377,7 @@ except: pass -def delNodeFlag(self, node, flag_mask): +def delNodeFlag(node, flag_mask): try: flags = node.getProperty('flags') if flags & flag_mask != 0: --- conga/luci/site/luci/Extensions/ricci_bridge.py 2006/10/23 19:31:15 1.30.2.1 +++ conga/luci/site/luci/Extensions/ricci_bridge.py 2006/10/24 16:36:23 1.30.2.2 @@ -298,7 +298,7 @@ return (None, None) return batchAttemptResult(doc) -def nodeLeaveCluster(rc, cluster_shutdown=False, purge=True): +def nodeLeaveCluster(rc, cluster_shutdown=False, purge=False): cshutdown = 'false' if cluster_shutdown == True: cshutdown = 'true' --- conga/luci/site/luci/Extensions/ricci_communicator.py 2006/10/16 07:39:27 1.9 +++ conga/luci/site/luci/Extensions/ricci_communicator.py 2006/10/24 16:36:23 1.9.2.1 @@ -1,17 +1,21 @@ - - from time import * from socket import * import xml import xml.dom from xml.dom import minidom - - +from LuciSyslog import LuciSyslog from HelperFunctions import access_to_host_allowed - CERTS_DIR_PATH = '/var/lib/luci/var/certs/' +try: + luci_log = LuciSyslog() +except: + pass + +class RicciError(Exception): + pass + class RicciCommunicator: def __init__(self, hostname, port=11111): self.__hostname = hostname @@ -21,16 +25,32 @@ self.__cert_file = CERTS_DIR_PATH + 'cacert.pem' # socket - sock = socket(AF_INET, SOCK_STREAM) - sock.settimeout(2.0) - sock.connect((self.__hostname, self.__port)) - self.ss = ssl(sock, self.__privkey_file, self.__cert_file) - sock.settimeout(600.0) # 10 minutes - # TODO: data transfer timeout should be much less, - # leave until all calls are async ricci calls + try: + sock = socket(AF_INET, SOCK_STREAM) + sock.settimeout(2.0) + sock.connect((self.__hostname, self.__port)) + except Exception, e: + raise RicciError, 'Error connecting to %s:%d: %s' \ + % (self.__hostname, self.__port, str(e)) + luci_log.debug_verbose('Connected to %s:%d' \ + % (self.__hostname, self.__port)) + try: + self.ss = ssl(sock, self.__privkey_file, self.__cert_file) + # TODO: data transfer timeout should be much less, + # leave until all calls are async ricci calls + sock.settimeout(600.0) # 10 minutes + except Exception, e: + raise RicciError, 'Error setting up SSL for connection to %s: %s' \ + % (self.__hostname, str(e)) # receive ricci header hello = self.__receive() + try: + luci_log.debug_verbose('Received header from %s: \"%s\"' \ + % (self.__hostname, hello.toxml())) + except: + pass + self.__authed = hello.firstChild.getAttribute('authenticated') == 'true' self.__cluname = hello.firstChild.getAttribute('clustername') self.__clualias = hello.firstChild.getAttribute('clusteralias') @@ -42,21 +62,35 @@ def hostname(self): + luci_log.debug_verbose('[auth %d] reported hostname = %s' \ + % (self.__authed, self.__hostname)) return self.__hostname def authed(self): + luci_log.debug_verbose('reported authed = %d for %s' \ + % (self.__authed, self.__hostname)) return self.__authed def system_name(self): + luci_log.debug_verbose('[auth %d] reported system_name = %s for %s' \ + % (self.__authed, self.__reported_hostname, self.__hostname)) return self.__reported_hostname def cluster_info(self): + luci_log.debug_verbose('[auth %d] reported cluster_info = (%s,%s) for %s' \ + % (self.__authed, self.__cluname, self.__clualias, self.__hostname)) return (self.__cluname, self.__clualias) def os(self): + luci_log.debug_verbose('[auth %d] reported system_name = %s for %s' \ + % (self.__authed, self.__os, self.__hostname)) return self.__os def dom0(self): + luci_log.debug_verbose('[auth %d] reported system_name = %s for %s' \ + % (self.__authed, self.__dom0, self.__hostname)) return self.__dom0 def auth(self, password): if self.authed(): + luci_log.debug_verbose('already authenticated to %s' \ + % self.__hostname) return True # send request @@ -71,8 +105,9 @@ # receive response resp = self.__receive() self.__authed = resp.firstChild.getAttribute('authenticated') == 'true' - - return self.authed() + + luci_log.debug_verbose('auth call returning %d' % self.__authed) + return self.__authed def unauth(self): @@ -83,15 +118,33 @@ doc.appendChild(ricci) self.__send(doc) resp = self.__receive() - ret = resp.firstChild.getAttribute('success') - if ret != '0': - raise Exception(str(ret)) + + luci_log.debug_verbose('trying to unauthenticate to %s' \ + % self.__hostname) + + try: + ret = resp.firstChild.getAttribute('success') + luci_log.debug_verbose('unauthenticate returned %s for %s' \ + % (ret, self.__hostname)) + if ret != '0': + raise Exception, 'Invalid response' + except: + errstr = 'Error authenticating to host %s: %s' \ + % (self.__hostname, str(ret)) + luci_log.debug(errstr) + raise RicciError, errstr return True def process_batch(self, batch_xml, async=False): + try: + luci_log.debug_verbose('auth=%d to %s for batch %s [async=%d]' \ + % (self.__authed, self.__hostname, batch_xml.toxml(), async)) + except: + pass + if not self.authed(): - raise 'not authenticated' + raise RicciError, 'not authenticated to host %s', self.__hostname # construct request doc = minidom.Document() @@ -108,13 +161,26 @@ ricci.appendChild(batch_xml.cloneNode(True)) # send request - self.__send(doc) - + try: + self.__send(doc) + except Exception, e: + luci_log.debug('Error sending XML \"%s\" to host %s' \ + % (doc.toxml(), self.__hostname)) + raise RicciError, 'Error sending XML to host %s: %s' \ + % (self.__hostname, str(e)) # receive response doc = self.__receive() + try: + luci_log.debug_verbose('received from %s XML \"%s\"' \ + % (self.__hostname, doc.toxml())) + except: + pass + if doc.firstChild.getAttribute('success') != '0': - raise 'ricci reported error' + luci_log.debug_verbose('batch command failed') + raise RicciError, 'The last ricci command to host %s failed' \ + % self.__hostname batch_node = None for node in doc.firstChild.childNodes: @@ -122,26 +188,42 @@ if node.nodeName == 'batch': batch_node = node.cloneNode(True) if batch_node == None: - raise 'missing in ricci\'s response' + luci_log.debug_verbose('batch node missing ') + raise RicciError, 'missing in ricci\'s response from %s' \ + % self.__hostname return batch_node def batch_run(self, batch_str, async=True): try: batch_xml_str = '' + batch_str + '' + luci_log.debug_verbose('attempting batch \"%s\" for host %s' \ + % (batch_xml_str, self.__hostname)) batch_xml = minidom.parseString(batch_xml_str).firstChild - except: - return None + except Exception, e: + luci_log.debug('received invalid batch XML for %s: \"%s\"' \ + % (self.__hostname, batch_xml_str)) + raise RicciError, 'batch XML is malformed' try: ricci_xml = self.process_batch(batch_xml, async) + try: + luci_log.debug_verbose('received XML \"%s\" from host %s in response to batch command.' \ + % (ricci_xml.toxml(), self.__hostname)) + except: + pass except: + luci_log.debug('An error occurred while trying to process the batch job: %s' % batch_xml_str) return None + return ricci_xml def batch_report(self, batch_id): + luci_log.debug_verbose('[auth=%d] asking for batchid# %d for host %s' \ + % (self.__authed, batch_id, self.__hostname)) + if not self.authed(): - raise 'not authenticated' + raise RicciError, 'Not authenticated to host %s' % self.__hostname # construct request doc = minidom.Document() @@ -153,22 +235,21 @@ # send request self.__send(doc) - - + + # receive response doc = self.__receive() if doc.firstChild.getAttribute('success') == '12': return None if doc.firstChild.getAttribute('success') != '0': - raise 'ricci reported error' - + raise RicciError, 'Error while retrieving batch report for batch #%s from host %s' % (batch_id, self.__hostname) batch_node = None for node in doc.firstChild.childNodes: if node.nodeType == xml.dom.Node.ELEMENT_NODE: if node.nodeName == 'batch': batch_node = node.cloneNode(True) if batch_node == None: - raise 'missing in ricci\'s response' + raise RicciError, 'Missing in ricci\'s response from host %s' % self.__hostname return batch_node @@ -177,13 +258,22 @@ def __send(self, xml_doc): buff = xml_doc.toxml() + '\n' - #print buff while len(buff) != 0: - pos = self.ss.write(buff) + try: + pos = self.ss.write(buff) + except Exception, e: + luci_log.debug('Error sending XML \"%s\" to %s' \ + % (buff, self.__hostname)) + raise RicciError, 'write error while sending XML to host %s' \ + % self.__hostname buff = buff[pos:] + try: + luci_log.debug_verbose('Sent XML \"%s\" to host %s' \ + % (xml_doc.toxml(), self.__hostname)) + except: + pass return - def __receive(self): doc = None xml_in = '' @@ -197,18 +287,38 @@ doc = minidom.parseString(xml_in) break except: - pass - except: - pass + # we haven't received all of the XML data yet. + continue + except Exception, e: + luci_log.debug('Error reading data from %s: %s' \ + % (self.__hostname, str(e))) + raise RicciError, 'Error reading data from host %s' \ + % self.__hostname + luci_log.debug_verbose('Received XML \"%s\" from host %s' \ + % (xml_in, self.__hostname)) + try: if doc == None: doc = minidom.parseString(xml_in) - if doc.firstChild.nodeName != 'ricci': - raise '' - except: - raise 'invalid ricci response' + except Exception, e: + luci_log.debug('Error parsing XML \"%s" from %s' \ + % (xml_in, str(e))) + raise RicciError, 'Error parsing XML from host %s: %s' \ + % (self.__hostname, str(e)) + + if not doc or not doc.firstChild: + raise RicciError, \ + 'Error an empty response was received from host %s' \ + % self.__hostname - #print doc.toxml() + try: + if doc.firstChild.nodeName != 'ricci': + luci_log.debug('Expecting \"ricci\" got XML \"%s\" from %s' % + (xml_in, self.__hostname)) + raise Exception, 'Expecting first XML child node to be \"ricci\"' + except Exception, e: + raise RicciError, 'Invalid XML ricci response from host %s' \ + % self.__hostname return doc @@ -220,7 +330,9 @@ try: return RicciCommunicator(hostname) - except: + except Exception, e: + luci_log.debug('Error creating a ricci connection to %s: %s' \ + % (hostname, str(e))) return None pass @@ -268,7 +380,12 @@ # module (-num) failed (next module won't be processed) def batch_status(batch_xml): if batch_xml.nodeName != 'batch': - raise 'not a batch' + try: + luci_log.debug('Expecting an XML batch node. Got \"%s\"' \ + % batch_xml.toxml()) + except: + pass + raise RicciError, 'Not an XML batch node' total = 0 last = 0 for node in batch_xml.childNodes: @@ -283,6 +400,12 @@ # failure last = last + 1 last = last - 2 * last + try: + luci_log.debug_verbose('Returning (%s, %s) for batch_status(\"%s\")' \ + % (last, total, batch_xml.toxml())) + except: + pass + return (last, total) @@ -307,7 +430,9 @@ # * error_msg: error message def extract_module_status(batch_xml, module_num=1): if batch_xml.nodeName != 'batch': - raise 'not a batch' + luci_log.debug('Expecting \"batch\" got \"%s\"' % batch_xml.toxml()) + raise RicciError, 'Invalid XML node; expecting a batch node' + c = 0 for node in batch_xml.childNodes: if node.nodeType == xml.dom.Node.ELEMENT_NODE: @@ -349,5 +474,5 @@ elif status == '5': return -103, 'module removed from schedule' - raise Exception, str('no ' + str(module_num) + 'th module in the batch, or malformed response') + raise RicciError, str('no ' + str(module_num) + 'th module in the batch, or malformed response')