From mboxrd@z Thu Jan 1 00:00:00 1970 From: rmccabe@sourceware.org Date: 1 Nov 2006 22:06:56 -0000 Subject: [Cluster-devel] conga luci/cluster/index_html luci/homebase/fo ... Message-ID: <20061101220656.11637.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: conga Branch: RHEL5 Changes by: rmccabe at sourceware.org 2006-11-01 22:06:55 Modified files: luci/cluster : index_html luci/homebase : form-macros luci/site/luci/Extensions: homebase_adapters.py ricci_bridge.py ricci_communicator.py ricci/modules/storage: LVM.cpp Log message: ref bz# 213504 Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/cluster/index_html.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.20.2.2&r2=1.20.2.3 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/homebase/form-macros.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.44.2.2&r2=1.44.2.3 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/homebase_adapters.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.34.2.4&r2=1.34.2.5 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/ricci_bridge.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.30.2.5&r2=1.30.2.6 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/ricci_communicator.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.9.2.2&r2=1.9.2.3 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/storage/LVM.cpp.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.7&r2=1.7.2.1 --- conga/luci/cluster/index_html 2006/10/31 17:48:33 1.20.2.2 +++ conga/luci/cluster/index_html 2006/11/01 22:06:55 1.20.2.3 @@ -162,7 +162,7 @@ - --- conga/luci/homebase/form-macros 2006/10/31 19:09:25 1.44.2.2 +++ conga/luci/homebase/form-macros 2006/11/01 22:06:55 1.44.2.3 @@ -1,7 +1,7 @@ - $Id: form-macros,v 1.44.2.2 2006/10/31 19:09:25 rmccabe Exp $ + $Id: form-macros,v 1.44.2.3 2006/11/01 22:06:55 rmccabe Exp $ @@ -583,8 +583,8 @@ -
- Check if storage system passwords are identical. +
+ Check if cluster node passwords are identical.
 
@@ -606,9 +606,9 @@ tal:attributes=" id python: '__SYSTEM' + str(sysNum) + ':Addr'; name python: '__SYSTEM' + str(sysNum) + ':Addr'; - value python: node['ricci_host']; + value python: node['host']; class python: 'hbInputSys' + ('errors' in node and ' error' or ''); - disabled python: nodeAuth and 1 or 0" + disabled python: (nodeAuth and node['host'].count('.') > 0) and 1 or 0" /> --- conga/luci/site/luci/Extensions/homebase_adapters.py 2006/10/31 19:09:27 1.34.2.4 +++ conga/luci/site/luci/Extensions/homebase_adapters.py 2006/11/01 22:06:55 1.34.2.5 @@ -20,9 +20,6 @@ except LuciSyslogError, e: pass -class InCluster(Exception): - pass - def siteIsSetup(self): try: if os.path.isfile(CERTS_DIR_PATH + 'privkey.pem') and os.path.isfile(CERTS_DIR_PATH + 'cacert.pem'): @@ -145,20 +142,27 @@ pass def nodeAuth(cluster, host, passwd): + messages = list() systemName = host os_str = 'rhel5' try: rc = RicciCommunicator(host) if not rc: - raise - systemName = rc.system_name() - except: - error = 'Unable to establish a connection to the ricci agent on \"' + host + '\"' - return { 'host': host, 'ricci_host': systemName, 'errors': error, 'cur_auth': False, 'os': os_str } + luci_log.debug_verbose('nodeAuth0: rc is None') + raise Exception, 'unknown error' + except Exception, e: + try: + error = 'Ricci connection to %s failed: %s' % (host, str(e)) + except: + error = 'Ricci connection to %s failed' % host + luci_log.debug_verbose('nodeAuth1: rc failed: %s' % error) + + return { 'host': host, 'ricci_host': host, 'errors': error, 'cur_auth': False, 'os': os_str } if rc.authed(): prevAuth = True + messages.append('Luci is already authenticated to %s -- not checking password' % host) else: prevAuth = False if not passwd: @@ -166,11 +170,15 @@ else: try: rc.auth(passwd) - except: pass + except: + pass if rc.authed(): - os_str = resolveOSType(rc.os()) - if not os_str: + try: + os_str = resolveOSType(rc.os()) + if not os_str: + raise + except: os_str = "rhel5" #Backup plan in case all is almost lost... systemName = rc.system_name() @@ -192,6 +200,7 @@ def validateAddClusterInitial(self, request, must_complete=True): errors = list() + messages = list() newNodeList = list() nodeHash = {} rnodeHash = {} @@ -205,8 +214,10 @@ try: rc = RicciCommunicator(sysData[0]) - except: - return (False, { 'errors': [ 'Unable to establish a connection to the Ricci agent on \"' + sysData[0] + '\"' ] }) + if not rc: + raise Exception, 'unknown error' + except Exception, e: + return (False, { 'errors': [ 'Unable to establish a connection to the Ricci agent on %s: %s' % (sysData[0], str(e)) ] }) prevAuth = 0 if not rc.authed(): @@ -250,7 +261,7 @@ if systemName[:9] == 'localhost': systemName = sysData[0] - node = { 'host': sysData[0], 'ricci_host': systemName, 'prev_auth': prevAuth, 'cur_auth': rc.authed(), 'os': os_str } + node = { 'host': rc.hostname(), 'ricci_host': systemName, 'prev_auth': prevAuth, 'cur_auth': rc.authed(), 'os': os_str } nodeHash[sysData[0]] = node rnodeHash[systemName] = node newNodeList.append(node) @@ -262,6 +273,8 @@ for i in nodeList: node = nodeAuth(clusterName, i, passwd) + if 'messages' in node: + messages.extend(node['messages']) if node['host'] in nodeHash or node['ricci_host'] in rnodeHash: continue nodeHash[node['host']] = node @@ -290,11 +303,11 @@ 'isComplete': len(filter(dfn, newNodeList)) == 0 } - if len(errors) < 1: + if len(errors) < len(nodeList): cluster_properties['redirect'] = HOMEBASE_ADD_CLUSTER return (len(errors) < 1, - {'errors': errors, 'requestResults': cluster_properties }) + {'messages': messages, 'errors': errors, 'requestResults': cluster_properties }) def validateAddCluster(self, request, must_complete=True): errors = list() @@ -603,8 +616,11 @@ try: rc = RicciCommunicator(sysData[0]) - except: - errors.append('Unable to contact the ricci agent for ' + host) + if rc is None: + raise Exception, 'unknown error' + except Exception, e: + errors.append('Unable to contact the ricci agent for %s: %s' \ + % (sysData[0], str(e))) i += 1 continue @@ -967,8 +983,11 @@ try: rc = RicciCommunicator(host) - except: - return 'Unable to establish a connection to the ricci agent on \"' + host + '\"' + if rc is None: + raise Exception, 'unknown error' + except Exception, e: + return 'Unable to establish a connection to the ricci agent on %s: %s' \ + % (host, str(e)) try: if not rc.authed(): @@ -996,20 +1015,20 @@ try: ssystem = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/') - except: - return 'Unable to create storage system \"' + host + '\"' + except Exception, e: + return 'Unable to create storage system %s: %s' % (host, str(e)) try: ssystem.manage_addFolder(host, '__luci__:system') newSystem = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/' + host) - except: - return 'Unable to create storage system \"' + host + '\"' + except Exception, e: + return 'Unable to create storage system %s: %s' % (host, str(e)) try: newSystem.manage_acquiredPermissions([]) newSystem.manage_role('View', ['Access contents information','View']) - except: - return 'Unable to set permissions on new system \"' + host + '\"' + except Exception, e: + return 'Unable to set permissions on storage system %s: %s' % (host, str(e)) return None @@ -1027,7 +1046,7 @@ try: clusters = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/') if not clusters: - raise + raise Exception, 'cannot find the cluster entry in the DB' except: nodeUnauth(nodeList) return 'Unable to create cluster \"' + clusterName + '\": the cluster directory is missing.' @@ -1044,28 +1063,29 @@ clusters.manage_addFolder(clusterName, '__luci__:cluster') newCluster = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName) if not newCluster: - raise - except: + raise Exception, 'unable to find cluster folder for %s' % clusterName + except Exception, e: nodeUnauth(nodeList) - return 'Unable to create cluster \"' + clusterName + '\"' + return 'Unable to create cluster %s: %s' % (clusterName, str(e)) try: newCluster.manage_acquiredPermissions([]) newCluster.manage_role('View', ['Access Contents Information','View']) - except: + except Exception, e: nodeUnauth(nodeList) try: clusters.manage_delObjects([clusterName]) except: pass - return 'Unable to set permissions on new cluster \"' + clusterName + '\"-- Cluster creation failed' + return 'Unable to set permissions on new cluster: %s: %s' % (clusterName, str(e)) - # XXX this needs to be improved. try: cluster_os = nodeList[0]['os'] if not cluster_os: raise KeyError, 'Cluster OS is blank' except KeyError, e: + luci_log.debug_verbose('Warning adding cluster %s: %s' \ + % (clusterName, str(e))) cluster_os = 'rhel5' try: @@ -1083,23 +1103,24 @@ newCluster.manage_addFolder(host, '__luci__:csystem:' + clusterName) newSystem = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName + '/' + host) if not newSystem: - raise 'not there' + raise Exception, 'unable to create cluster system DB entry' newSystem.manage_acquiredPermissions([]) newSystem.manage_role('View', [ 'Access contents information' , 'View' ]) - except: + except Exception, e: nodeUnauth(nodeList) try: clusters.manage_delObjects([clusterName]) except: pass - return 'Unable to create cluster node \"' + host + '\" for cluster \"' + clusterName + '\" -- Cluster creation failed."' + return 'Unable to create cluster node %s for cluster %s: %s' \ + % (host, clusterName, str(e)) try: ssystem = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/') if not ssystem: - raise - except: - return 'The storage directory is missing.' + raise Exception, 'The storage DB entry is missing' + except Exception, e: + return 'Error adding storage node %s: %s' % (host, str(e)) # Only add storage systems if the cluster and cluster node DB # objects were added successfully. @@ -1126,7 +1147,7 @@ try: clusterObj = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName) if not clusterObj: - raise + raise Exception, 'cluster %s DB entry is missing' % clusterName except: nodeUnauth(nodeList) return 'No cluster named \"' + clusterName + '\" is managed by Luci' @@ -1141,18 +1162,22 @@ clusterObj.manage_addFolder(host, '__luci__:csystem:' + clusterName) newSystem = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName + '/' + host) if not newSystem: - raise + raise Exception, 'cluster node DB entry for %s disappeared from under us' % host + newSystem.manage_acquiredPermissions([]) newSystem.manage_role('View', [ 'Access contents information' , 'View' ]) - except: + except Exception, e: nodeUnauth(nodeList) - return 'Unable to create cluster node \"' + host + '\" for cluster \"' + clusterName + '\"' + return 'Unable to create cluster node %s for cluster %s: %s' \ + % (host, clusterName, str(e)) try: ssystem = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/') if not ssystem: - raise - except: + raise Exception, 'storage DB entry is missing' + except Exception, e: + luci_log.debug_verbose('Error: adding storage DB node for %s: %s' \ + % (host, str(e))) return # Only add storage systems if the and cluster node DB @@ -1180,19 +1205,20 @@ try: ssystem = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/') if not ssystem: - raise - except: - return 'Unable to find storage system \"' + systemName + '\"' + raise Exception, 'storage DB entry is missing' + except Exception, e: + return 'Unable to find storage system %s: %s' % (systemName, str(e)) try: rc = RicciCommunicator(systemName) - if not rc: - raise - except: + if rc is None: + raise Exception, 'unknown error' + except Exception, e: try: ssystem.manage_delObjects([systemName]) except: return 'Unable to delete the storage system \"' + systemName + '\"' + luci_log.debug_verbose('ricci error for %s: %s' % (systemName, str(e))) return # Only unauthenticate if the system isn't a member of @@ -1210,8 +1236,9 @@ try: ssystem.manage_delObjects([systemName]) - except: - return 'Unable to delete storage system \"' + systemName + '\"' + except Exception, e: + return 'Unable to delete storage system %s: %s' \ + % (systemName, str(e)) def delCluster(self, clusterName): try: @@ -1238,25 +1265,33 @@ try: rc = RicciCommunicator(systemName) rc.unauth() - except: pass - cluster.manage_delObjects([systemName]) + except Exception, e: + luci_log.debug_verbose('ricci error for %s: %s' \ + % (systemName, str(e))) + + try: + cluster.manage_delObjects([systemName]) + except Exception, e: + err_str = 'Error deleting cluster object %s: %s' % (systemName, str(e)) + luci_log.debug_verbose(err_str) + return err_str def delClusterSystems(self, clusterName): try: cluster = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName) if not cluster: - raise + raise Exception, 'cluster DB entry is missing' csystems = getClusterSystems(self, clusterName) - except: + except Exception, e: + luci_log.debug_verbose('delCluSysterms: error for %s: %s' \ + % (clusterName, str(e))) return 'Unable to find any systems for cluster \"' + clusterName + '\"' errors = '' for i in csystems: - try: - delClusterSystem(self, cluster, i[0]) - except: - errors += 'Unable to delete the cluster system \"' + i[0] + '\"\n' - + err = delClusterSystem(self, cluster, i[0]) + if err: + errors += 'Unable to delete the cluster system %s: %s\n' % (i[0], err) return errors def getDefaultUser(self, request): --- conga/luci/site/luci/Extensions/ricci_bridge.py 2006/10/31 17:28:04 1.30.2.5 +++ conga/luci/site/luci/Extensions/ricci_bridge.py 2006/11/01 22:06:55 1.30.2.6 @@ -507,13 +507,22 @@ batch.appendChild(module) # temporary workaround for ricci bug - system_info = rc.system_name() - rc = RicciCommunicator(system_info) + system_info = rc.hostname() + try: + rc = RicciCommunicator(system_info) + if rc is None: + raise Exception, 'unknown error' + except Exception, e: + luci_log.debug_verbose('Ricci error while connecting to %s: %s' \ + % (system_info, str(e))) + return None # end workaround try: ret = rc.process_batch(batch) except Exception, e: + luci_log.debug_verbose('process_batch error for %s: %s' \ + % (system_info, str(e))) return None if not ret: --- conga/luci/site/luci/Extensions/ricci_communicator.py 2006/10/31 17:28:04 1.9.2.2 +++ conga/luci/site/luci/Extensions/ricci_communicator.py 2006/11/01 22:06:55 1.9.2.3 @@ -32,6 +32,10 @@ except Exception, e: raise RicciError, 'Error connecting to %s:%d: %s' \ % (self.__hostname, self.__port, str(e)) + except: + raise RicciError, 'Error connecting to %s:%d: unknown error' \ + % (self.__hostname, self.__port) + luci_log.debug_verbose('Connected to %s:%d' \ % (self.__hostname, self.__port)) try: @@ -42,6 +46,9 @@ except Exception, e: raise RicciError, 'Error setting up SSL for connection to %s: %s' \ % (self.__hostname, str(e)) + except: + raise RicciError, 'Error setting up SSL for connection to %s' \ + % self.__hostname # receive ricci header hello = self.__receive() @@ -168,6 +175,8 @@ % (doc.toxml(), self.__hostname)) raise RicciError, 'Error sending XML to host %s: %s' \ % (self.__hostname, str(e)) + except: + raise RicciError, 'Error sending XML to host %s' % self.__hostname # receive response doc = self.__receive() @@ -264,8 +273,11 @@ try: pos = self.ss.write(buff) except Exception, e: - luci_log.debug('Error sending XML \"%s\" to %s' \ - % (buff, self.__hostname)) + luci_log.debug('Error sending XML \"%s\" to %s: %s' \ + % (buff, self.__hostname, str(e))) + raise RicciError, 'write error while sending XML to host %s' \ + % self.__hostname + except: raise RicciError, 'write error while sending XML to host %s' \ % self.__hostname buff = buff[pos:] @@ -294,8 +306,9 @@ except Exception, e: luci_log.debug('Error reading data from %s: %s' \ % (self.__hostname, str(e))) - raise RicciError, 'Error reading data from host %s' \ - % self.__hostname + raise RicciError, 'Error reading data from host %s' % self.__hostname + except: + raise RicciError, 'Error reading data from host %s' % self.__hostname luci_log.debug_verbose('Received XML \"%s\" from host %s' \ % (xml_in, self.__hostname)) --- conga/ricci/modules/storage/LVM.cpp 2006/10/06 03:10:13 1.7 +++ conga/ricci/modules/storage/LVM.cpp 2006/11/01 22:06:55 1.7.2.1 @@ -78,6 +78,9 @@ static const map probe_pvs(); +static bool +cluster_quorate(); + // pvs @@ -644,11 +647,15 @@ return get_locking_type() == "2"; } -void -LVM::check_locking() +bool +cluster_quorate() { - if (get_locking_type() == "2") { - // check if quorate + bool use_magma = true; + if (access("/sbin/magma_tool", X_OK)) + use_magma = false; + + if (use_magma) { + // use magma_tool String out, err; int status; vector args; @@ -657,11 +664,63 @@ throw command_not_found_error_msg("magma_tool"); if (status) throw String("cluster tools: magma_tool errored"); - if (out.find("Quorate") == out.npos) + if (out.find("Quorate") != out.npos) + return true; + else + return false; + } else { + // use cman_tool + String cman_tool_path = "/sbin/cman_tool"; + if (access(cman_tool_path.c_str(), X_OK)) + cman_tool_path = "/usr/sbin/cman_tool"; + + String out, err; + int status; + vector args; + args.push_back("status"); + if (utils::execute(cman_tool_path, args, out, err, status)) + throw command_not_found_error_msg("cman_tool"); + if (status) + throw String("cluster tools: cman_tool errored"); + + long long quorum = -1; + long long votes = -1; + vector lines = utils::split(utils::strip(out), "\n"); + for (vector::const_iterator iter = lines.begin(); + iter != lines.end(); + iter++) { + vector words = utils::split(*iter); + if (words.size() < 2) + continue; + if (words[0] == "Quorum:") + quorum = utils::to_long(words[1]); + if (words[0] == "Total_votes:") + votes = utils::to_long(words[1]); + if (words.size() < 3) + continue; + if (words[0] == "Total" && + words[1] == "votes:") + votes = utils::to_long(words[2]); + } + + if (quorum <= 0 || + votes < 0) + throw String("Unable to retrieve cluster quorum info"); + return votes >= quorum; + } +} + +void +LVM::check_locking() +{ + if (get_locking_type() == "2") { + if (!cluster_quorate()) throw ClusterNotQuorateError(); // try to start clvmd, if not running - args.clear(); + String out, err; + int status; + vector args; args.push_back("clvmd"); args.push_back("start"); if (utils::execute("/sbin/service", args, out, err, status))