From mboxrd@z Thu Jan 1 00:00:00 1970 From: kupcevic@sourceware.org Date: 16 Nov 2006 19:35:00 -0000 Subject: [Cluster-devel] conga ./clustermon.spec.in.in ./conga.spec.in. ... Message-ID: <20061116193500.4503.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: conga Branch: RHEL5 Changes by: kupcevic at sourceware.org 2006-11-16 19:34:54 Modified files: . : clustermon.spec.in.in conga.spec.in.in luci : Makefile load_site.py pack.py luci/cluster : form-chooser form-macros index_html portlet_cluconfig resource-form-macros resource_form_handlers.js luci/homebase : form-chooser form-macros homebase_common.js homebase_portlet_fetcher index_html luci_homebase.css portlet_homebase luci/plone-custom: conga.js footer luci/site/luci/Extensions: FenceDaemon.py FenceHandler.py LuciSyslog.py cluster_adapters.py conga_constants.py homebase_adapters.py ricci_bridge.py ricci_communicator.py make : version.in ricci/modules/log: LogParser.cpp Added files: doc : config_rhel5.html luci/docs : config_rhel5 Removed files: luci/site/luci/Extensions: Quorumd.py ricci_test.py Log message: sync with HEAD, bump to version 0.8-24 Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/conga/clustermon.spec.in.in.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.18.2.3&r2=1.18.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/conga.spec.in.in.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.45.2.4&r2=1.45.2.5 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/doc/config_rhel5.html.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.20&r2=1.20.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/load_site.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.14&r2=1.14.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/pack.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4&r2=1.4.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/cluster/form-chooser.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.12&r2=1.12.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/cluster/form-macros.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.90.2.2&r2=1.90.2.3 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/cluster/index_html.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.20.2.3&r2=1.20.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/cluster/portlet_cluconfig.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2&r2=1.2.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/cluster/resource-form-macros.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.21.2.1&r2=1.21.2.2 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/cluster/resource_form_handlers.js.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.20.2.1&r2=1.20.2.2 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/docs/config_rhel5.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.2.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/homebase/form-chooser.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.10&r2=1.10.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/homebase/form-macros.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.44.2.3&r2=1.44.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/homebase/homebase_common.js.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.13&r2=1.13.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/homebase/homebase_portlet_fetcher.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.3&r2=1.3.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/homebase/index_html.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.18.2.1&r2=1.18.2.2 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/homebase/luci_homebase.css.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.28&r2=1.28.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/homebase/portlet_homebase.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.7&r2=1.7.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/plone-custom/conga.js.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.3&r2=1.3.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/plone-custom/footer.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2&r2=1.2.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/FenceDaemon.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1&r2=1.1.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/FenceHandler.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4&r2=1.4.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/LuciSyslog.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2.2.2&r2=1.2.2.3 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/cluster_adapters.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.120.2.8&r2=1.120.2.9 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/conga_constants.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.19.2.1&r2=1.19.2.2 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/homebase_adapters.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.34.2.5&r2=1.34.2.6 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/ricci_bridge.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.30.2.6&r2=1.30.2.7 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/ricci_communicator.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.9.2.3&r2=1.9.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/Quorumd.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/ricci_test.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.1&r2=NONE http://sourceware.org/cgi-bin/cvsweb.cgi/conga/make/version.in.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.21.2.4&r2=1.21.2.5 http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/log/LogParser.cpp.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.6.2.1&r2=1.6.2.2 --- conga/clustermon.spec.in.in 2006/11/01 23:11:25 1.18.2.3 +++ conga/clustermon.spec.in.in 2006/11/16 19:34:52 1.18.2.4 @@ -194,6 +194,10 @@ %changelog + +* Thu Nov 16 2006 Stanko Kupcevic 0.8-24 + - version bump + * Wed Nov 01 2006 Stanko Kupcevic 0.8-23 - version bump --- conga/conga.spec.in.in 2006/11/01 23:11:25 1.45.2.4 +++ conga/conga.spec.in.in 2006/11/16 19:34:52 1.45.2.5 @@ -282,6 +282,31 @@ %changelog + +* Thu Nov 16 2006 Stanko Kupcevic 0.8-24 +- Fixed bz215039 (Cannot create a new resource via luci web app) +- Fixed bz215034 (Cannot change daemon properties via luci web app) +- Fixed bz214790 (Stop/restart cluster not working via luci web app) +- Fixed bz213690 (luci - Reversed links in colophon (gui - minor)) +- Fixed bz213266 (Conga - modifying a cluster node's cluster membership in a subnet with other clusters results in the wrong cluster.conf) +- Fixed bz213083 (luci - should display usernames in some logical/sorted order (usability)) +- Fixed bz212601 (luci - selecting cluster name or cluster node name indicates error in install and displays empty form) +- Fixed bz212021 (various luci buttons do nothing) +- Fixed bz212006 (create cluster does not show status as cluster is being created) +- Fixed bz212584 (luci does not retrieve failed ricci queue elements) +- Fixed bz212440 (luci persists possibly incorrect name for a system) +- Improved bz213306 (ricci - log probing can take minutes to complete) +- Fixed starting/stopping services +- Fixed deleting cluster +- Fixed deleting node +- Fixed redirection for all async->busy wait calls +- Storage module: properly probe cluster quorum if LVM locking + is marked as clustered +- Resolves: bz215039, bz215034, bz214790, bz213690, bz213266 +- Resolves: bz213083, bz212601, bz212021, bz212006, bz212584 +- Resolves: bz212440 +- Related: bz213306 + * Wed Nov 01 2006 Stanko Kupcevic 0.8-23 - 213504: luci does not correctly handle cluster.conf with nodes lacking FQDN /cvs/cluster/conga/doc/config_rhel5.html,v --> standard output revision 1.1.2.1 --- conga/doc/config_rhel5.html +++ - 2006-11-16 19:34:56.739739000 +0000 @@ -0,0 +1,260 @@ +Advanced Cluster Configuration Parameters + +

Advanced Cluster Configuration Parameters

+

+

+
secauth
+This specifies that HMAC/SHA1 authentication should be used to authenticate +all messages. It further specifies that all data should be encrypted with the +sober128 encryption algorithm to protect data from eavesdropping. +

+Enabling this option adds a 36 byte header to every message sent by totem which +reduces total throughput. Encryption and authentication consume 75% of CPU +cycles in aisexec as measured with gprof when enabled. +

+For 100mbit networks with 1500 MTU frame transmissions: +A throughput of 9mb/sec is possible with 100% cpu utilization when this +option is enabled on 3ghz cpus. +A throughput of 10mb/sec is possible wth 20% cpu utilization when this +optin is disabled on 3ghz cpus. +

+For gig-e networks with large frame transmissions: +A throughput of 20mb/sec is possible when this option is enabled on +3ghz cpus. +A throughput of 60mb/sec is possible when this option is disabled on +3ghz cpus. +

+The default is on. +

+

rrp_mode
+This specifies the mode of redundant ring, which may be none, active, or +passive. Active replication offers slightly lower latency from transmit +to delivery in faulty network environments but with less performance. +Passive replication may nearly double the speed of the totem protocol +if the protocol doesn't become cpu bound. The final option is none, in +which case only one network interface will be used to operate the totem +protocol. +

+If only one interface directive is specified, none is automatically chosen. +If multiple interface directives are specified, only active or passive may +be chosen. +

+

netmtu
+This specifies the network maximum transmit unit. To set this value beyond +1500, the regular frame MTU, requires ethernet devices that support large, or +also called jumbo, frames. If any device in the network doesn't support large +frames, the protocol will not operate properly. The hosts must also have their +mtu size set from 1500 to whatever frame size is specified here. +

+Please note while some NICs or switches claim large frame support, they support +9000 MTU as the maximum frame size including the IP header. Setting the netmtu +and host MTUs to 9000 will cause totem to use the full 9000 bytes of the frame. +Then Linux will add a 18 byte header moving the full frame size to 9018. As a +result some hardware will not operate properly with this size of data. A netmtu +of 8982 seems to work for the few large frame devices that have been tested. +Some manufacturers claim large frame support when in fact they support frame +sizes of 4500 bytes. +

+Increasing the MTU from 1500 to 8982 doubles throughput performance from 30MB/sec +to 60MB/sec as measured with evsbench with 175000 byte messages with the secauth +directive set to off. +

+When sending multicast traffic, if the network frequently reconfigures, chances are +that some device in the network doesn't support large frames. +

+Choose hardware carefully if intending to use large frame support. +

+The default is 1500. +

+

threads
+This directive controls how many threads are used to encrypt and send multicast +messages. If secauth is off, the protocol will never use threaded sending. +If secauth is on, this directive allows systems to be configured to use +multiple threads to encrypt and send multicast messages. +

+A thread directive of 0 indicates that no threaded send should be used. This +mode offers best performance for non-SMP systems. +

+The default is 0. +

+

vsftype
+This directive controls the virtual synchrony filter type used to identify +a primary component. The preferred choice is YKD dynamic linear voting, +however, for clusters larger then 32 nodes YKD consumes alot of memory. For +large scale clusters that are created by changing the MAX_PROCESSORS_COUNT +#define in the C code totem.h file, the virtual synchrony filter "none" is +recommended but then AMF and DLCK services (which are currently experimental) +are not safe for use. +

+The default is ykd. The vsftype can also be set to none. +

+Within the +totem + +directive, there are several configuration options which are used to control +the operation of the protocol. It is generally not recommended to change any +of these values without proper guidance and sufficient testing. Some networks +may require larger values if suffering from frequent reconfigurations. Some +applications may require faster failure detection times which can be achieved +by reducing the token timeout. +

+

token
+This timeout specifies in milliseconds until a token loss is declared after not +receiving a token. This is the time spent detecting a failure of a processor +in the current configuration. Reforming a new configuration takes about 50 +milliseconds in addition to this timeout. +

+The default is 5000 milliseconds. +

+

token_retransmit
+This timeout specifies in milliseconds after how long before receiving a token +the token is retransmitted. This will be automatically calculated if token +is modified. It is not recommended to alter this value without guidance from +the openais community. +

+The default is 238 milliseconds. +

+

hold
+This timeout specifies in milliseconds how long the token should be held by +the representative when the protocol is under low utilization. It is not +recommended to alter this value without guidance from the openais community. +

+The default is 180 milliseconds. +

+

retransmits_before_loss
+This value identifies how many token retransmits should be attempted before +forming a new configuration. If this value is set, retransmit and hold will +be automatically calculated from retransmits_before_loss and token. +

+The default is 4 retransmissions. +

+

join
+This timeout specifies in milliseconds how long to wait for join messages in +the membership protocol. +

+The default is 100 milliseconds. +

+

send_join
+This timeout specifies in milliseconds an upper range between 0 and send_join +to wait before sending a join message. For configurations with less then +32 nodes, this parameter is not necessary. For larger rings, this parameter +is necessary to ensure the NIC is not overflowed with join messages on +formation of a new ring. A reasonable value for large rings (128 nodes) would +be 80msec. Other timer values must also change if this value is changed. Seek +advice from the openais mailing list if trying to run larger configurations. +

+The default is 0 milliseconds. +

+

consensus
+This timeout specifies in milliseconds how long to wait for consensus to be +achieved before starting a new round of membership configuration. +

+The default is 200 milliseconds. +

+

merge
+This timeout specifies in milliseconds how long to wait before checking for +a partition when no multicast traffic is being sent. If multicast traffic +is being sent, the merge detection happens automatically as a function of +the protocol. +

+The default is 200 milliseconds. +

+

downcheck
+This timeout specifies in milliseconds how long to wait before checking +that a network interface is back up after it has been downed. +

+The default is 1000 millseconds. +

+

fail_to_recv_const
+This constant specifies how many rotations of the token without receiving any +of the messages when messages should be received may occur before a new +configuration is formed. +

+The default is 50 failures to receive a message. +

+

seqno_unchanged_const
+This constant specifies how many rotations of the token without any multicast +traffic should occur before the merge detection timeout is started. +

+The default is 30 rotations. +

+

heartbeat_failures_allowed
+[HeartBeating mechanism] +Configures the optional HeartBeating mechanism for faster failure detection. Keep in +mind that engaging this mechanism in lossy networks could cause faulty loss declaration +as the mechanism relies on the network for heartbeating. +

+So as a rule of thumb use this mechanism if you require improved failure in low to +medium utilized networks. +

+This constant specifies the number of heartbeat failures the system should tolerate +before declaring heartbeat failure e.g 3. Also if this value is not set or is 0 then the +heartbeat mechanism is not engaged in the system and token rotation is the method +of failure detection +

+The default is 0 (disabled). +

+

max_network_delay
+[HeartBeating mechanism] +This constant specifies in milliseconds the approximate delay that your network takes +to transport one packet from one machine to another. This value is to be set by system +engineers and please dont change if not sure as this effects the failure detection +mechanism using heartbeat. +

+The default is 50 milliseconds. +

+

window_size
+This constant specifies the maximum number of messages that may be sent on one +token rotation. If all processors perform equally well, this value could be +large (300), which would introduce higher latency from origination to delivery +for very large rings. To reduce latency in large rings(16+), the defaults are +a safe compromise. If 1 or more slow processor(s) are present among fast +processors, window_size should be no larger then 256000 / netmtu to avoid +overflow of the kernel receive buffers. The user is notified of this by +the display of a retransmit list in the notification logs. There is no loss +of data, but performance is reduced when these errors occur. +

+The default is 50 messages. +

+

max_messages
+This constant specifies the maximum number of messages that may be sent by one +processor on receipt of the token. The max_messages parameter is limited to +256000 / netmtu to prevent overflow of the kernel transmit buffers. +

+The default is 17 messages. +

+

rrp_problem_count_timeout
+This specifies the time in milliseconds to wait before decrementing the +problem count by 1 for a particular ring to ensure a link is not marked +faulty for transient network failures. +

+The default is 1000 milliseconds. +

+

rrp_problem_count_threshold
+This specifies the number of times a problem is detected with a link before +setting the link faulty. Once a link is set faulty, no more data is +transmitted upon it. Also, the problem counter is no longer decremented when +the problem count timeout expires. +

+A problem is detected whenever all tokens from the proceeding processor have +not been received within the rrp_token_expired_timeout. The +rrp_problem_count_threshold * rrp_token_expired_timeout should be atleast 50 +milliseconds less then the token timeout, or a complete reconfiguration +may occur. +

+The default is 20 problem counts. +

+

rrp_token_expired_timeout
+This specifies the time in milliseconds to increment the problem counter for +the redundant ring protocol after not having received a token from all rings +for a particular processor. +

+This value will automatically be calculated from the token timeout and +problem_count_threshold but may be overridden. It is not recommended to +override this value without guidance from the openais community. +

+The default is 47 milliseconds. +

+

+ + --- conga/luci/Makefile 2006/08/09 15:52:14 1.20 +++ conga/luci/Makefile 2006/11/16 19:34:52 1.20.2.1 @@ -1,4 +1,3 @@ -# $Id: Makefile,v 1.20 2006/08/09 15:52:14 rmccabe Exp $ ZOPEINSTANCE=/var/lib/luci include ../make/version.in --- conga/luci/load_site.py 2006/09/19 15:01:20 1.14 +++ conga/luci/load_site.py 2006/11/16 19:34:52 1.14.2.1 @@ -1,5 +1,4 @@ #!/usr/bin/python -# $Id: load_site.py,v 1.14 2006/09/19 15:01:20 rmccabe Exp $ ############################################################################## # --- conga/luci/pack.py 2006/07/24 20:17:01 1.4 +++ conga/luci/pack.py 2006/11/16 19:34:52 1.4.2.1 @@ -1,5 +1,4 @@ #!/usr/bin/python -# $Id: pack.py,v 1.4 2006/07/24 20:17:01 kupcevic Exp $ import os, sys, string --- conga/luci/cluster/form-chooser 2006/10/16 20:25:33 1.12 +++ conga/luci/cluster/form-chooser 2006/11/16 19:34:52 1.12.2.1 @@ -12,7 +12,7 @@ -
+
--- conga/luci/cluster/form-macros 2006/10/31 17:28:03 1.90.2.2 +++ conga/luci/cluster/form-macros 2006/11/16 19:34:52 1.90.2.3 @@ -7,7 +7,6 @@
-

Entry Form

@@ -26,26 +25,33 @@ -

- +

+ + -

-
- +

+
+ - - - - - + + + [cluster software installed] + - + [cluster node rebooted] + + + + [cluster node configured] - - + + + [cluster node joined cluster] @@ -61,18 +67,17 @@
- + global ricci_agent python: here.getRicciAgent(clu[0])" /> -
+
An error occurred when trying to contact any of the nodes in the cluster.
- + @@ -84,15 +89,33 @@ + tal:content="cstatus/clusteralias | string:[unknown]" />
@@ -352,34 +375,41 @@ set_page_title('Luci ??? cluster ??? Configure cluster properties'); - + + + + + + +
-
+ + + + @@ -504,6 +807,10 @@ + +
@@ -570,6 +877,10 @@ tal:attributes="value request/pagetype | request/form/pagetype" /> + +
@@ -779,11 +1090,13 @@ +
- - + +

Cluster Process Form

@@ -1198,7 +1511,7 @@
ESH Path (Optional) + tal:attributes="cur_fencedev/login | string:/opt/pan-mgr/bin/esh" />
@@ -1412,7 +1725,9 @@ + +

Cluster daemons running on this node

-
+ @@ -1488,23 +1803,38 @@ - +
- +
- + + + + +
+ + + + + +

@@ -1604,10 +1934,13 @@ set_page_title('Luci ??? cluster ??? nodes'); -
+
+ + +
@@ -1743,7 +2076,7 @@ value request/form/clusterName | request/clustername | nothing" /> -

Add a node to

+

Add a node to

@@ -1808,7 +2141,10 @@
-

Node Process Form

+ +
+ +
@@ -1820,12 +2156,13 @@ set_page_title('Luci ??? cluster ??? services'); - + + +
-

Autostart is enabled for this service

+

Autostart is enabled for this service

@@ -1881,7 +2218,6 @@
-

Path to configuration file:

Name of configuration file:

@@ -1890,7 +2226,6 @@
-

Properties for Xen VM

@@ -1974,7 +2309,9 @@ + + @@ -1987,7 +2324,9 @@ + + @@ -1998,15 +2337,17 @@ set_page_title('Luci ??? cluster ??? services ??? Stop a service'); + +
- + @@ -2027,7 +2368,7 @@ - +
@@ -2041,7 +2382,9 @@ + + + +
@@ -2299,11 +2644,24 @@ set_page_title('Luci ??? cluster ??? fence devices');

Shared Fence Devices for Cluster:

- + +

Agent type:

Name:

-

Agent type:

+

Nodes using this device for fencing:

+
    + + +
  • No nodes currently employ this fence device
  • +
    + +
  • + + +
  • +
    +

--- conga/luci/cluster/index_html 2006/11/01 22:06:55 1.20.2.3 +++ conga/luci/cluster/index_html 2006/11/16 19:34:52 1.20.2.4 @@ -9,11 +9,6 @@ xml:lang language"> - - - - - @@ -29,47 +24,54 @@ - -
- - - - - - - - - - - - - - - - - - - - - - - A slot where you can insert elements in the header from a template + + + + + + + + + + + + + + + + + + + + + - - - A slot where you can insert CSS in the header from a template - - - - + + + - A slot where you can insert javascript in the header from a template - --> - - + + + + + + + + + +
@@ -214,8 +222,8 @@ - + +
@@ -238,6 +246,7 @@
+
--- conga/luci/cluster/portlet_cluconfig 2006/09/27 22:24:11 1.2 +++ conga/luci/cluster/portlet_cluconfig 2006/11/16 19:34:52 1.2.2.1 @@ -10,7 +10,7 @@
- + Clusters
@@ -36,7 +36,8 @@
- +
--- conga/luci/cluster/resource-form-macros 2006/10/31 17:28:03 1.21.2.1 +++ conga/luci/cluster/resource-form-macros 2006/11/16 19:34:53 1.21.2.2 @@ -43,8 +43,7 @@ + global rescInf python: here.getResourcesInfo(modelb, request)" />
@@ -258,44 +257,43 @@

Configure

- +
- + - +
- + - +
- + - +
- + - +
- + - +
- + - +
- + - +
- +
@@ -686,14 +684,12 @@ + checked python: nfstype != 'nfs4' and 'checked'" />NFS3
+ checked python: nfstype == 'nfs4' and 'checked'" />NFS4 --- conga/luci/cluster/resource_form_handlers.js 2006/10/31 17:28:03 1.20.2.1 +++ conga/luci/cluster/resource_form_handlers.js 2006/11/16 19:34:53 1.20.2.2 @@ -101,7 +101,7 @@ function validate_nfs_mount(form) { var errors = new Array(); - if (!form.mountpoint || str_is_blank(form.mounpoint.value)) { + if (!form.mountpoint || str_is_blank(form.mountpoint.value)) { errors.push('No mount point was given.'); set_form_err(form.mountpoint); } else /cvs/cluster/conga/luci/docs/config_rhel5,v --> standard output revision 1.2.2.1 --- conga/luci/docs/config_rhel5 +++ - 2006-11-16 19:34:57.842868000 +0000 @@ -0,0 +1,260 @@ +Advanced Cluster Configuration Parameters + +

Advanced Cluster Configuration Parameters

+

+

+
secauth
+This specifies that HMAC/SHA1 authentication should be used to authenticate +all messages. It further specifies that all data should be encrypted with the +sober128 encryption algorithm to protect data from eavesdropping. +

+Enabling this option adds a 36 byte header to every message sent by totem which +reduces total throughput. Encryption and authentication consume 75% of CPU +cycles in aisexec as measured with gprof when enabled. +

+For 100mbit networks with 1500 MTU frame transmissions: +A throughput of 9mb/sec is possible with 100% cpu utilization when this +option is enabled on 3ghz cpus. +A throughput of 10mb/sec is possible wth 20% cpu utilization when this +optin is disabled on 3ghz cpus. +

+For gig-e networks with large frame transmissions: +A throughput of 20mb/sec is possible when this option is enabled on +3ghz cpus. +A throughput of 60mb/sec is possible when this option is disabled on +3ghz cpus. +

+The default is on. +

+

rrp_mode
+This specifies the mode of redundant ring, which may be none, active, or +passive. Active replication offers slightly lower latency from transmit +to delivery in faulty network environments but with less performance. +Passive replication may nearly double the speed of the totem protocol +if the protocol doesn't become cpu bound. The final option is none, in +which case only one network interface will be used to operate the totem +protocol. +

+If only one interface directive is specified, none is automatically chosen. +If multiple interface directives are specified, only active or passive may +be chosen. +

+

netmtu
+This specifies the network maximum transmit unit. To set this value beyond +1500, the regular frame MTU, requires ethernet devices that support large, or +also called jumbo, frames. If any device in the network doesn't support large +frames, the protocol will not operate properly. The hosts must also have their +mtu size set from 1500 to whatever frame size is specified here. +

+Please note while some NICs or switches claim large frame support, they support +9000 MTU as the maximum frame size including the IP header. Setting the netmtu +and host MTUs to 9000 will cause totem to use the full 9000 bytes of the frame. +Then Linux will add a 18 byte header moving the full frame size to 9018. As a +result some hardware will not operate properly with this size of data. A netmtu +of 8982 seems to work for the few large frame devices that have been tested. +Some manufacturers claim large frame support when in fact they support frame +sizes of 4500 bytes. +

+Increasing the MTU from 1500 to 8982 doubles throughput performance from 30MB/sec +to 60MB/sec as measured with evsbench with 175000 byte messages with the secauth +directive set to off. +

+When sending multicast traffic, if the network frequently reconfigures, chances are +that some device in the network doesn't support large frames. +

+Choose hardware carefully if intending to use large frame support. +

+The default is 1500. +

+

threads
+This directive controls how many threads are used to encrypt and send multicast +messages. If secauth is off, the protocol will never use threaded sending. +If secauth is on, this directive allows systems to be configured to use +multiple threads to encrypt and send multicast messages. +

+A thread directive of 0 indicates that no threaded send should be used. This +mode offers best performance for non-SMP systems. +

+The default is 0. +

+

vsftype
+This directive controls the virtual synchrony filter type used to identify +a primary component. The preferred choice is YKD dynamic linear voting, +however, for clusters larger then 32 nodes YKD consumes alot of memory. For +large scale clusters that are created by changing the MAX_PROCESSORS_COUNT +#define in the C code totem.h file, the virtual synchrony filter "none" is +recommended but then AMF and DLCK services (which are currently experimental) +are not safe for use. +

+The default is ykd. The vsftype can also be set to none. +

+Within the +totem + +directive, there are several configuration options which are used to control +the operation of the protocol. It is generally not recommended to change any +of these values without proper guidance and sufficient testing. Some networks +may require larger values if suffering from frequent reconfigurations. Some +applications may require faster failure detection times which can be achieved +by reducing the token timeout. +

+

token
+This timeout specifies in milliseconds until a token loss is declared after not +receiving a token. This is the time spent detecting a failure of a processor +in the current configuration. Reforming a new configuration takes about 50 +milliseconds in addition to this timeout. +

+The default is 5000 milliseconds. +

+

token_retransmit
+This timeout specifies in milliseconds after how long before receiving a token +the token is retransmitted. This will be automatically calculated if token +is modified. It is not recommended to alter this value without guidance from +the openais community. +

+The default is 238 milliseconds. +

+

hold
+This timeout specifies in milliseconds how long the token should be held by +the representative when the protocol is under low utilization. It is not +recommended to alter this value without guidance from the openais community. +

+The default is 180 milliseconds. +

+

retransmits_before_loss
+This value identifies how many token retransmits should be attempted before +forming a new configuration. If this value is set, retransmit and hold will +be automatically calculated from retransmits_before_loss and token. +

+The default is 4 retransmissions. +

+

join
+This timeout specifies in milliseconds how long to wait for join messages in +the membership protocol. +

+The default is 100 milliseconds. +

+

send_join
+This timeout specifies in milliseconds an upper range between 0 and send_join +to wait before sending a join message. For configurations with less then +32 nodes, this parameter is not necessary. For larger rings, this parameter +is necessary to ensure the NIC is not overflowed with join messages on +formation of a new ring. A reasonable value for large rings (128 nodes) would +be 80msec. Other timer values must also change if this value is changed. Seek +advice from the openais mailing list if trying to run larger configurations. +

+The default is 0 milliseconds. +

+

consensus
+This timeout specifies in milliseconds how long to wait for consensus to be +achieved before starting a new round of membership configuration. +

+The default is 200 milliseconds. +

+

merge
+This timeout specifies in milliseconds how long to wait before checking for +a partition when no multicast traffic is being sent. If multicast traffic +is being sent, the merge detection happens automatically as a function of +the protocol. +

+The default is 200 milliseconds. +

+

downcheck
+This timeout specifies in milliseconds how long to wait before checking +that a network interface is back up after it has been downed. +

+The default is 1000 millseconds. +

+

fail_to_recv_const
+This constant specifies how many rotations of the token without receiving any +of the messages when messages should be received may occur before a new +configuration is formed. +

+The default is 50 failures to receive a message. +

+

seqno_unchanged_const
+This constant specifies how many rotations of the token without any multicast +traffic should occur before the merge detection timeout is started. +

+The default is 30 rotations. +

+

heartbeat_failures_allowed
+[HeartBeating mechanism] +Configures the optional HeartBeating mechanism for faster failure detection. Keep in +mind that engaging this mechanism in lossy networks could cause faulty loss declaration +as the mechanism relies on the network for heartbeating. +

+So as a rule of thumb use this mechanism if you require improved failure in low to +medium utilized networks. +

+This constant specifies the number of heartbeat failures the system should tolerate +before declaring heartbeat failure e.g 3. Also if this value is not set or is 0 then the +heartbeat mechanism is not engaged in the system and token rotation is the method +of failure detection +

+The default is 0 (disabled). +

+

max_network_delay
+[HeartBeating mechanism] +This constant specifies in milliseconds the approximate delay that your network takes +to transport one packet from one machine to another. This value is to be set by system +engineers and please dont change if not sure as this effects the failure detection +mechanism using heartbeat. +

+The default is 50 milliseconds. +

+

window_size
+This constant specifies the maximum number of messages that may be sent on one +token rotation. If all processors perform equally well, this value could be +large (300), which would introduce higher latency from origination to delivery +for very large rings. To reduce latency in large rings(16+), the defaults are +a safe compromise. If 1 or more slow processor(s) are present among fast +processors, window_size should be no larger then 256000 / netmtu to avoid +overflow of the kernel receive buffers. The user is notified of this by +the display of a retransmit list in the notification logs. There is no loss +of data, but performance is reduced when these errors occur. +

+The default is 50 messages. +

+

max_messages
+This constant specifies the maximum number of messages that may be sent by one +processor on receipt of the token. The max_messages parameter is limited to +256000 / netmtu to prevent overflow of the kernel transmit buffers. +

+The default is 17 messages. +

+

rrp_problem_count_timeout
+This specifies the time in milliseconds to wait before decrementing the +problem count by 1 for a particular ring to ensure a link is not marked +faulty for transient network failures. +

+The default is 1000 milliseconds. +

+

rrp_problem_count_threshold
+This specifies the number of times a problem is detected with a link before +setting the link faulty. Once a link is set faulty, no more data is +transmitted upon it. Also, the problem counter is no longer decremented when +the problem count timeout expires. +

+A problem is detected whenever all tokens from the proceeding processor have +not been received within the rrp_token_expired_timeout. The +rrp_problem_count_threshold * rrp_token_expired_timeout should be atleast 50 +milliseconds less then the token timeout, or a complete reconfiguration +may occur. +

+The default is 20 problem counts. +

+

rrp_token_expired_timeout
+This specifies the time in milliseconds to increment the problem counter for +the redundant ring protocol after not having received a token from all rings +for a particular processor. +

+This value will automatically be calculated from the token timeout and +problem_count_threshold but may be overridden. It is not recommended to +override this value without guidance from the openais community. +

+The default is 47 milliseconds. +

+

+ + --- conga/luci/homebase/form-chooser 2006/10/09 16:16:11 1.10 +++ conga/luci/homebase/form-chooser 2006/11/16 19:34:53 1.10.2.1 @@ -4,10 +4,6 @@ The title - - $Id: form-chooser,v 1.10 2006/10/09 16:16:11 rmccabe Exp $ - - --- conga/luci/homebase/form-macros 2006/11/01 22:06:55 1.44.2.3 +++ conga/luci/homebase/form-macros 2006/11/16 19:34:53 1.44.2.4 @@ -1,9 +1,5 @@ - - $Id: form-macros,v 1.44.2.3 2006/11/01 22:06:55 rmccabe Exp $ - - --- conga/luci/homebase/homebase_common.js 2006/10/04 17:24:58 1.13 +++ conga/luci/homebase/homebase_common.js 2006/11/16 19:34:53 1.13.2.1 @@ -8,6 +8,35 @@ ielem.className = ielem.className.replace(/ formerror/, ''); } +function toggle_visible(img_obj, elem_id, label_id) { + var elem = document.getElementById(elem_id) + if (!elem) + return (-1); + + var old_state = !!!elem.className.match(/invisible/i); + + if (label_id) { + var label = document.getElementById(label_id); + if (!label) + return (-1); + if (old_state) + label.innerHTML = 'Show'; + else + label.innerHTML = 'Hide'; + } + + if (old_state) { + img_obj.src = 'arrow_right.png'; + img_obj.alt = '[-]'; + elem.className += ' invisible'; + } else { + img_obj.src = 'arrow_down.png'; + img_obj.alt = '[+]'; + elem.className = elem.className.replace(/invisible/i,''); + } + return (0); +} + function is_valid_int(str, min, max) { if (str.match(/[^0-9 -]/)) return (0); --- conga/luci/homebase/homebase_portlet_fetcher 2006/05/18 17:47:15 1.3 +++ conga/luci/homebase/homebase_portlet_fetcher 2006/11/16 19:34:53 1.3.2.1 @@ -3,10 +3,6 @@ - - $Id: homebase_portlet_fetcher,v 1.3 2006/05/18 17:47:15 rmccabe Exp $ - -
--- conga/luci/homebase/index_html 2006/10/31 17:28:04 1.18.2.1 +++ conga/luci/homebase/index_html 2006/11/16 19:34:53 1.18.2.2 @@ -14,10 +14,6 @@ tal:attributes="lang language; xml:lang language"> - - $Id: index_html,v 1.18.2.1 2006/10/31 17:28:04 rmccabe Exp $ - - --- conga/luci/homebase/luci_homebase.css 2006/10/16 19:13:45 1.28 +++ conga/luci/homebase/luci_homebase.css 2006/11/16 19:34:53 1.28.2.1 @@ -380,6 +380,20 @@ padding: .5em; } +a.cluster_help:link, +a.cluster_help:visited, +a.cluster_help:visited { + color: #000000; + text-decoration: none ! important; +} + +a.cluster_help:hover { + text-decoration: none ! important; + cursor: help; + color: #000000; + border-bottom: 1px solid #cccccc; +} + a.cluster:link, a.cluster:visited { border-bottom: 1px dashed #cccccc; --- conga/luci/homebase/portlet_homebase 2006/06/20 21:21:47 1.7 +++ conga/luci/homebase/portlet_homebase 2006/11/16 19:34:53 1.7.2.1 @@ -5,10 +5,6 @@ - - $Id: portlet_homebase,v 1.7 2006/06/20 21:21:47 rmccabe Exp $ - -
--- conga/luci/plone-custom/conga.js 2006/10/10 19:19:13 1.3 +++ conga/luci/plone-custom/conga.js 2006/11/16 19:34:53 1.3.2.1 @@ -8,5 +8,7 @@ function popup_window(url, width_percent, height_percent) { var width = window.innerWidth * (width_percent / 100); var height = window.innerHeight * (height_percent / 100); - window.open('luci/doc', '', 'width=' + width + ',height=' + height + ',scrollbars,resizable', false); + var newwin = window.open(url, 'Conga Help', 'width=' + width + ',height=' + height + ',scrollbars,resizable', false); + if (newwin) + newwin.focus(); } --- conga/luci/plone-custom/footer 2006/09/19 14:48:21 1.2 +++ conga/luci/plone-custom/footer 2006/11/16 19:34:53 1.2.2.1 @@ -6,7 +6,7 @@ The - + Conga Cluster and Storage Management System @@ -19,7 +19,7 @@ i18n:name="current_year" tal:define="now modules/DateTime/DateTime" tal:content="now/year" /> - by Red Hat, Inc + by Red Hat, Inc

--- conga/luci/site/luci/Extensions/FenceDaemon.py 2006/05/30 20:17:21 1.1 +++ conga/luci/site/luci/Extensions/FenceDaemon.py 2006/11/16 19:34:53 1.1.2.1 @@ -27,4 +27,10 @@ val = self.getAttribute("clean_start") return val + def setPostJoinDelay(self, delay): + self.addAttribute("post_join_delay", delay) + + def setPostFailDelay(self, delay): + self.addAttribute("post_fail_delay", delay) + --- conga/luci/site/luci/Extensions/FenceHandler.py 2006/10/16 19:58:38 1.4 +++ conga/luci/site/luci/Extensions/FenceHandler.py 2006/11/16 19:34:53 1.4.2.1 @@ -68,6 +68,8 @@ "fence_egenera":True, "fence_bullpap":True, "fence_drac":False, + "fence_xvm":True, + "fence_scsi":True, "fence_ipmilan":False, "fence_manual":False } --- conga/luci/site/luci/Extensions/LuciSyslog.py 2006/10/31 17:28:04 1.2.2.2 +++ conga/luci/site/luci/Extensions/LuciSyslog.py 2006/11/16 19:34:53 1.2.2.3 @@ -3,14 +3,12 @@ LOG_DAEMON, LOG_PID, LOG_NDELAY, LOG_INFO, \ LOG_WARNING, LOG_AUTH, LOG_DEBUG -"""Exception class for the LuciSyslog facility -""" +# Exception class for the LuciSyslog facility class LuciSyslogError(Exception): def __init__(self, msg): Exception.__init__(self, msg) -"""Facility that provides centralized syslog(3) functionality for luci -""" +# Facility that provides centralized syslog(3) functionality for luci class LuciSyslog: def __init__(self): self.__init = 0 @@ -26,7 +24,8 @@ try: syslog(LOG_INFO, msg) except: - raise LuciSyslogError, 'syslog info call failed' + pass + #raise LuciSyslogError, 'syslog info call failed' def warn(self, msg): if not self.__init: @@ -34,7 +33,8 @@ try: syslog(LOG_WARNING, msg) except: - raise LuciSyslogError, 'syslog warn call failed' + pass + #raise LuciSyslogError, 'syslog warn call failed' def private(self, msg): if not self.__init: @@ -42,15 +42,30 @@ try: syslog(LOG_AUTH, msg) except: - raise LuciSyslogError, 'syslog private call failed' + pass + #raise LuciSyslogError, 'syslog private call failed' def debug_verbose(self, msg): if not LUCI_DEBUG_MODE or LUCI_DEBUG_VERBOSITY < 2 or not self.__init: return - try: - syslog(LOG_DEBUG, msg) - except: - raise LuciSyslogError, 'syslog debug call failed' + + msg_len = len(msg) + if msg_len < 1: + return + + while True: + cur_len = min(msg_len, 800) + cur_msg = msg[:cur_len] + try: + syslog(LOG_DEBUG, cur_msg) + except: + pass + + msg_len -= cur_len + if msg_len > 0: + msg = msg[cur_len:] + else: + break def debug(self, msg): if not LUCI_DEBUG_MODE or not self.__init: @@ -58,7 +73,8 @@ try: syslog(LOG_DEBUG, msg) except: - raise LuciSyslogError, 'syslog debug call failed' + pass + #raise LuciSyslogError, 'syslog debug call failed' def close(self): try: --- conga/luci/site/luci/Extensions/cluster_adapters.py 2006/10/31 17:28:04 1.120.2.8 +++ conga/luci/site/luci/Extensions/cluster_adapters.py 2006/11/16 19:34:53 1.120.2.9 @@ -1,11 +1,10 @@ import socket from ModelBuilder import ModelBuilder from xml.dom import minidom -from ZPublisher import HTTPRequest import AccessControl from conga_constants import * from ricci_bridge import * -from ricci_communicator import * +from ricci_communicator import RicciCommunicator, RicciError, batch_status, extract_module_status from string import lower import time import Products.ManagedSystem @@ -20,10 +19,11 @@ from Script import Script from Samba import Samba from clusterOS import resolveOSType +from FenceHandler import FenceHandler, FENCE_OPTS from GeneralError import GeneralError from UnknownClusterError import UnknownClusterError from homebase_adapters import nodeUnauth, nodeAuth, manageCluster, createClusterSystems, havePermCreateCluster, setNodeFlag, delNodeFlag, userAuthenticated, getStorageNode, getClusterNode -from LuciSyslog import LuciSyslogError, LuciSyslog +from LuciSyslog import LuciSyslog #Policy for showing the cluster chooser menu: #1) If there are no clusters in the ManagedClusterSystems @@ -33,11 +33,9 @@ #then only display chooser if the current user has #permissions on at least one. If the user is admin, show ALL clusters -CLUSTER_FOLDER_PATH = '/luci/systems/cluster/' - try: luci_log = LuciSyslog() -except LuciSyslogError, e: +except: pass def validateClusterNodes(request, sessionData, clusterName, numStorage): @@ -114,7 +112,6 @@ def validateCreateCluster(self, request): errors = list() - messages = list() requestResults = {} if not havePermCreateCluster(self): @@ -188,7 +185,7 @@ batchNode = createClusterBatch(cluster_os, clusterName, clusterName, - map(lambda x: x['ricci_host'], nodeList), + map(lambda x: x['host'], nodeList), True, True, enable_storage, @@ -213,10 +210,10 @@ for i in nodeList: success = True try: - rc = RicciCommunicator(i['ricci_host']) + rc = RicciCommunicator(i['host']) except RicciError, e: luci_log.debug('Unable to connect to the ricci agent on %s: %s'\ - % (i['ricci_host'], str(e))) + % (i['host'], str(e))) success = False except: success = False @@ -224,39 +221,48 @@ if success == True: try: resultNode = rc.process_batch(batchNode, async=True) - batch_id_map[i['ricci_host']] = resultNode.getAttribute('batch_id') + batch_id_map[i['host']] = resultNode.getAttribute('batch_id') except: success = False if not success: nodeUnauth(nodeList) cluster_properties['isComplete'] = False - errors.append('An error occurred while attempting to add cluster node \"' + i['ricci_host'] + '\"') + errors.append('An error occurred while attempting to add cluster node \"' + i['host'] + '\"') return (False, {'errors': errors, 'requestResults':cluster_properties }) buildClusterCreateFlags(self, batch_id_map, clusterName) - messages.append('Creation of cluster \"' + clusterName + '\" has begun') - return (True, {'errors': errors, 'messages': messages }) + response = request.RESPONSE + response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clusterName + '&busyfirst=true') def buildClusterCreateFlags(self, batch_map, clusterName): - path = str(CLUSTER_FOLDER_PATH + clusterName) - clusterfolder = self.restrictedTraverse(path) - for key in batch_map.keys(): - key = str(key) - id = batch_map[key] - batch_id = str(id) - objname = str(key + "____flag") #This suffix needed to avoid name collision - clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #now designate this new object properly - objpath = str(path + "/" + objname) - flag = self.restrictedTraverse(objpath) - #flag[BATCH_ID] = batch_id - #flag[TASKTYPE] = CLUSTER_ADD - #flag[FLAG_DESC] = "Creating node " + key + " for cluster " + clusterName - flag.manage_addProperty(BATCH_ID,batch_id, "string") - flag.manage_addProperty(TASKTYPE,CLUSTER_ADD, "string") - flag.manage_addProperty(FLAG_DESC,"Creating node " + key + " for cluster " + clusterName, "string") - flag.manage_addProperty(LAST_STATUS, 0, "int") + path = str(CLUSTER_FOLDER_PATH + clusterName) + + try: + clusterfolder = self.restrictedTraverse(path) + except Exception, e: + luci_log.debug_verbose('buildCCF0: no cluster folder at %s' % path) + return None + + for key in batch_map.keys(): + try: + key = str(key) + batch_id = str(batch_map[key]) + #This suffix needed to avoid name collision + objname = str(key + "____flag") + + clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) + #now designate this new object properly + objpath = str(path + "/" + objname) + flag = self.restrictedTraverse(objpath) + + flag.manage_addProperty(BATCH_ID, batch_id, "string") + flag.manage_addProperty(TASKTYPE, CLUSTER_ADD, "string") + flag.manage_addProperty(FLAG_DESC, "Creating node " + key + " for cluster " + clusterName, "string") + flag.manage_addProperty(LAST_STATUS, 0, "int") + except Exception, e: + luci_log.debug_verbose('buildCCF1: error creating flag for %s: %s' \ + % (key, str(e))) def validateAddClusterNode(self, request): errors = list() @@ -264,7 +270,7 @@ requestResults = {} try: - sessionData = request.SESSION.get('checkRet') + sessionData = request.SESSION.get('checkRet') except: sessionData = None @@ -333,7 +339,8 @@ while i < len(nodeList): clunode = nodeList[i] try: - batchNode = addClusterNodeBatch(clusterName, + batchNode = addClusterNodeBatch(clunode['os'], + clusterName, True, True, enable_storage, @@ -346,7 +353,7 @@ clunode['errors'] = True nodeUnauth(nodeList) cluster_properties['isComplete'] = False - errors.append('Unable to initiate node creation for host \"' + clunode['ricci_host'] + '\"') + errors.append('Unable to initiate node creation for host \"' + clunode['host'] + '\"') if not cluster_properties['isComplete']: return (False, {'errors': errors, 'requestResults': cluster_properties}) @@ -363,28 +370,29 @@ clunode = nodeList[i] success = True try: - rc = RicciCommunicator(clunode['ricci_host']) - except: - luci_log.info('Unable to connect to the ricci daemon on host ' + clunode['ricci_host']) + rc = RicciCommunicator(clunode['host']) + except Exception, e: + luci_log.info('Unable to connect to the ricci daemon on host %s: %s'% (clunode['host'], str(e))) success = False if success: try: resultNode = rc.process_batch(batchNode, async=True) - batch_id_map[clunode['ricci_host']] = resultNode.getAttribute('batch_id') + batch_id_map[clunode['host']] = resultNode.getAttribute('batch_id') except: success = False if not success: nodeUnauth(nodeList) cluster_properties['isComplete'] = False - errors.append('An error occurred while attempting to add cluster node \"' + clunode['ricci_host'] + '\"') + errors.append('An error occurred while attempting to add cluster node \"' + clunode['host'] + '\"') return (False, {'errors': errors, 'requestResults': cluster_properties}) - messages.append('Cluster join initiated for host \"' + clunode['ricci_host'] + '\"') - + messages.append('Cluster join initiated for host \"' + clunode['host'] + '\"') buildClusterCreateFlags(self, batch_id_map, clusterName) - return (True, {'errors': errors, 'messages': messages}) + + response = request.RESPONSE + response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clusterName + '&busyfirst=true') def validateServiceAdd(self, request): try: @@ -420,16 +428,18 @@ form_hash[form_parent] = {'form': None, 'kids': []} form_hash[form_parent]['kids'].append(form_id) dummy_form = {} + for i in ielems: try: - type = str(i.getAttribute('type')) + input_type = str(i.getAttribute('type')) except: continue - if not type or type == 'button': + if not input_type or input_type == 'button': continue try: dummy_form[str(i.getAttribute('name'))] = str(i.getAttribute('value')) - except: + except Exception, e: + luci_log.debug_verbose('Error parsing service XML: %s' % str(e)) pass try: @@ -469,7 +479,7 @@ raise Exception, 'An error occurred while adding this resource' modelb = res[1] newres = res[0] - addResource(self, request, modelb, newres) + addResource(self, request, modelb, newres, res_type) except Exception, e: if len(errors) < 1: errors.append('An error occurred while adding this resource') @@ -480,35 +490,52 @@ ## Cluster properties form validation routines -def validateMCastConfig(self, form): +# rhel5 cluster version +def validateMCastConfig(model, form): + errors = list() try: mcast_val = form['mcast'].strip().lower() if mcast_val != 'true' and mcast_val != 'false': - raise KeyError(mcast_val) + raise KeyError, mcast_val if mcast_val == 'true': - mcast_val = 1 + mcast_manual = True else: - mcast_val = 0 + mcast_manual = False except KeyError, e: - return (False, {'errors': ['An invalid multicast selection was made.']}) + errors.append('An invalid multicast selection was made') + return (False, {'errors': errors}) - if not mcast_val: - return (True, {'messages': ['Changes accepted. - FILL ME IN']}) + if mcast_manual == True: + try: + addr_str = form['mcast_addr'].strip() + socket.inet_pton(socket.AF_INET, addr_str) + except KeyError, e: + errors.append('No multicast address was given') + except socket.error, e: + try: + socket.inet_pton(socket.AF_INET6, addr_str) + except socket.error, e: + errors.append('An invalid multicast address was given: %s') + else: + addr_str = None + + if (addr_str is None and mcast_manual != True) or (mcast_manual == True and addr_str == model.getMcastAddr()): + errors.append('No multicast configuration changes were made.') + return (False, {'errors': errors}) try: - addr_str = form['mcast_addr'].strip() - socket.inet_pton(socket.AF_INET, addr_str) - except KeyError, e: - return (False, {'errors': ['No multicast address was given']}) - except socket.error, e: - try: - socket.inet_pton(socket.AF_INET6, addr_str) - except socket.error, e6: - return (False, {'errors': ['An invalid multicast address was given: ' + e]}) + model.usesMulticast = True + model.mcast_address = addr_str + except Exception, e: + luci_log.debug('Error updating mcast properties: %s' % str(e)) + errors.append('Unable to update cluster multicast properties') - return (True, {'messages': ['Changes accepted. - FILL ME IN']}) + if len(errors) > 0: + return (False, {'errors': errors}) -def validateQDiskConfig(self, form): + return (True, {}) + +def validateQDiskConfig(model, form): errors = list() try: @@ -520,7 +547,7 @@ else: qdisk_val = 0 except KeyError, e: - return (False, {'errors': ['An invalid quorum partition selection was made.']}) + return (False, {'errors': ['An invalid quorum partition selection was made']}) if not qdisk_val: return (True, {'messages': ['Changes accepted. - FILL ME IN']}) @@ -528,64 +555,64 @@ try: interval = int(form['interval']) if interval < 0: - raise ValueError('Interval must be 0 or greater.') + raise ValueError, 'Interval must be 0 or greater' except KeyError, e: - errors.append('No Interval value was given.') + errors.append('No Interval value was given') except ValueError, e: - errors.append('An invalid Interval value was given: ' + e) + errors.append('An invalid Interval value was given: %s' % str(e)) try: votes = int(form['votes']) if votes < 1: - raise ValueError('Votes must be greater than 0') + raise ValueError, 'Votes must be greater than 0' except KeyError, e: - errors.append('No Votes value was given.') + errors.append('No Votes value was given') except ValueError, e: - errors.append('An invalid Votes value was given: ' + e) + errors.append('An invalid Votes value was given: %s' % str(e)) try: tko = int(form['tko']) if tko < 0: - raise ValueError('TKO must be 0 or greater') + raise ValueError, 'TKO must be 0 or greater' except KeyError, e: - errors.append('No TKO value was given.') + errors.append('No TKO value was given') except ValueError, e: - errors.append('An invalid TKO value was given: ' + e) + errors.append('An invalid TKO value was given: %s' % str(e)) try: min_score = int(form['min_score']) if min_score < 1: raise ValueError('Minimum Score must be greater than 0') except KeyError, e: - errors.append('No Minimum Score value was given.') + errors.append('No Minimum Score value was given') except ValueError, e: - errors.append('An invalid Minimum Score value was given: ' + e) + errors.append('An invalid Minimum Score value was given: %s' % str(e)) try: device = form['device'].strip() if not device: - raise KeyError('device') + raise KeyError, 'device is none' except KeyError, e: - errors.append('No Device value was given.') + errors.append('No Device value was given') try: label = form['label'].strip() if not label: - raise KeyError('label') + raise KeyError, 'label is none' except KeyError, e: - errors.append('No Label value was given.') + errors.append('No Label value was given') num_heuristics = 0 try: num_heuristics = int(form['num_heuristics']) if num_heuristics < 0: - raise ValueError(form['num_heuristics']) + raise ValueError, 'invalid number of heuristics: %s' % form['num_heuristics'] if num_heuristics == 0: num_heuristics = 1 except KeyError, e: errors.append('No number of heuristics was given.') except ValueError, e: - errors.append('An invalid number of heuristics was given: ' + e) + errors.append('An invalid number of heuristics was given: %s' % str(e)) heuristics = list() for i in xrange(num_heuristics): @@ -600,40 +627,49 @@ (not prefix + 'hscore' in form or not form['hscore'].strip())): # The row is blank; ignore it. continue - errors.append('No heuristic name was given for heuristic #' + str(i + 1)) + errors.append('No heuristic name was given for heuristic #%d' % i + 1) try: hpath = form[prefix + 'hpath'] except KeyError, e: - errors.append('No heuristic path was given for heuristic #' + str(i + 1)) + errors.append('No heuristic path was given for heuristic #%d' % i + 1) try: hint = int(form[prefix + 'hint']) if hint < 1: - raise ValueError('Heuristic interval values must be greater than 0.') + raise ValueError, 'Heuristic interval values must be greater than 0' except KeyError, e: - errors.append('No heuristic interval was given for heuristic #' + str(i + 1)) + errors.append('No heuristic interval was given for heuristic #%d' % i + 1) except ValueError, e: - errors.append('An invalid heuristic interval was given for heuristic #' + str(i + 1) + ': ' + e) + errors.append('An invalid heuristic interval was given for heuristic #%d: %s' % (i + 1, str(e))) try: hscore = int(form[prefix + 'score']) if hscore < 1: - raise ValueError('Heuristic scores must be greater than 0.') + raise ValueError, 'Heuristic scores must be greater than 0' except KeyError, e: - errors.append('No heuristic score was given for heuristic #' + str(i + 1)) + errors.append('No heuristic score was given for heuristic #%d' % i + 1) except ValueError, e: - errors.append('An invalid heuristic score was given for heuristic #' + str(i + 1) + ': ' + e) + errors.append('An invalid heuristic score was given for heuristic #%d: %s' % (i + 1, str(e))) heuristics.append([ hname, hpath, hint, hscore ]) if len(errors) > 0: return (False, {'errors': errors }) return (True, {'messages': ['Changes accepted. - FILL ME IN']}) -def validateGeneralConfig(self, form): +def validateGeneralConfig(model, form): errors = list() try: + cp = model.getClusterPtr() + old_name = model.getClusterAlias() + old_ver = int(cp.getConfigVersion()) + except Exception, e: + luci_log.debug_verbose('getConfigVersion: %s' % str(e)) + errors.append('unable to determine the current configuration version') + return (False, {'errors': errors}) + + try: cluster_name = form['cluname'].strip() if not cluster_name: raise KeyError('cluname') @@ -642,19 +678,29 @@ try: version_num = int(form['cfgver']) - if version_num < 0: - raise ValueError('configuration version numbers must be 0 or greater.') + if version_num < old_ver: + raise ValueError, 'configuration version number must be %d or greater.' % old_ver + # we'll increment the cluster version before propagating it. + version_num -= 1 except KeyError, e: errors.append('No cluster configuration version was given.') except ValueError, e: - errors.append('An invalid configuration version was given: ' + e) + errors.append('An invalid configuration version was given: %s' % str(e)) + + if len(errors) < 1: + try: + if cluster_name != old_name: + cp.addAttribute('alias', cluster_name) + cp.setConfigVersion(str(version_num)) + except Exception, e: + luci_log.debug_verbose('unable to update general properties: %s' % str(e)) + errors.append('Unable to update the cluster configuration.') if len(errors) > 0: return (False, {'errors': errors}) + return (True, {}) - return (True, {'messages': ['Changes accepted. - FILL ME IN']}) - -def validateFenceConfig(self, form): +def validateFenceConfig(model, form): errors = list() try: @@ -664,7 +710,7 @@ except KeyError, e: errors.append('No post fail delay was given.') except ValueError, e: - errors.append('Invalid post fail delay: ' + e) + errors.append('Invalid post fail delay: %s' % str(e)) try: post_join_delay = int(form['post_join_delay']) @@ -673,12 +719,26 @@ except KeyError, e: errors.append('No post join delay was given.') except ValueError, e: - errors.append('Invalid post join delay: ' + e) + errors.append('Invalid post join delay: %s' % str(e)) + + try: + fd = model.getFenceDaemonPtr() + old_pj_delay = fd.getPostJoinDelay() + old_pf_delay = fd.getPostFailDelay() + + if post_join_delay == old_pj_delay and post_fail_delay == old_pf_delay: + errors.append('No fence daemon properties were changed.') + else: + fd.setPostJoinDelay(str(post_join_delay)) + fd.setPostFailDelay(str(post_fail_delay)) + except Exception, e: + luci_log.debug_verbose('Unable to update fence daemon properties: %s' % str(e)) + errors.append('An error occurred while attempting to update fence daemon properties.') if len(errors) > 0: return (False, {'errors': errors }) - return (True, {'messages': ['Changes accepted. - FILL ME IN']}) + return (True, {}) configFormValidators = { 'general': validateGeneralConfig, @@ -690,27 +750,111 @@ def validateConfigCluster(self, request): errors = list() messages = list() + rc = None + + try: + model = request.SESSION.get('model') + if not model: + raise Exception, 'model is none' + except Exception, e: + model = None + try: + cluname = request.form['clustername'] + except: + try: + cluname = request['clustername'] + except: + luci_log.debug_verbose('VCC0a: no model, no cluster name') + return (False, {'errors': ['No cluster model was found.']}) - if not 'form' in request: - return (False, {'errors': ['No form was submitted.']}) - if not 'configtype' in request.form: + try: + model = getModelForCluster(self, cluname) + except: + model = None + + if model is None: + luci_log.debug_verbose('VCC0: unable to get model from session') + return (False, {'errors': ['No cluster model was found.']}) + try: + if not 'configtype' in request.form: + luci_log.debug_verbose('VCC2: no configtype') + raise Exception, 'no config type' + except Exception, e: + luci_log.debug_verbose('VCC2a: %s' % str(e)) return (False, {'errors': ['No configuration type was submitted.']}) + if not request.form['configtype'] in configFormValidators: + luci_log.debug_verbose('VCC3: invalid config type: %s' % request.form['configtype']) return (False, {'errors': ['An invalid configuration type was submitted.']}) - val = configFormValidators[request.form['configtype']] - ret = val(self, request.form) + try: + cp = model.getClusterPtr() + except: + luci_log.debug_verbose('VCC3a: getClusterPtr failed') + return (False, {'errors': ['No cluster model was found.']}) + + config_validator = configFormValidators[request.form['configtype']] + ret = config_validator(model, request.form) retcode = ret[0] if 'errors' in ret[1]: errors.extend(ret[1]['errors']) + if 'messages' in ret[1]: messages.extend(ret[1]['messages']) + if retcode == True: + try: + config_ver = int(cp.getConfigVersion()) + 1 + # always increment the configuration version + cp.setConfigVersion(str(config_ver)) + model.setModified(True) + conf_str = model.exportModelAsString() + if not conf_str: + raise Exception, 'conf_str is none' + except Exception, e: + luci_log.debug_verbose('VCC4: export model as string failed: %s' \ + % str(e)) + errors.append('Unable to store the new cluster configuration') + + try: + clustername = model.getClusterName() + if not clustername: + raise Exception, 'cluster name from modelb.getClusterName() is blank' + except Exception, e: + luci_log.debug_verbose('VCC5: error: getClusterName: %s' % str(e)) + errors.append('Unable to determine cluster name from model') + + if len(errors) > 0: + return (retcode, {'errors': errors, 'messages': messages}) + + if not rc: + rc = getRicciAgent(self, clustername) + if not rc: + luci_log.debug_verbose('VCC6: unable to find a ricci agent for the %s cluster' % clustername) + errors.append('Unable to contact a ricci agent for cluster %s' \ + % clustername) + + if rc: + batch_id, result = setClusterConf(rc, str(conf_str)) + if batch_id is None or result is None: + luci_log.debug_verbose('VCC7: setCluserConf: batchid or result is None') + errors.append('Unable to propagate the new cluster configuration for %s' \ + % clustername) + else: + try: + set_node_flag(self, clustername, rc.hostname(), batch_id, + CLUSTER_CONFIG, 'Updating cluster configuration') + except: + pass + if len(errors) < 1: messages.append('The cluster properties have been updated.') + else: + return (retcode, {'errors': errors, 'messages': messages}) - return (retcode, {'errors': errors, 'messages': messages}) + response = request.RESPONSE + response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername + '&busyfirst=true') def validateFenceAdd(self, request): return (True, {}) @@ -718,6 +862,89 @@ def validateFenceEdit(self, request): return (True, {}) +def validateDaemonProperties(self, request): + errors = list() + + form = None + try: + response = request.response + form = request.form + if not form: + form = None + raise Exception, 'no form was submitted' + except: + pass + + if form is None: + luci_log.debug_verbose('VDP0: no form was submitted') + return (False, {'errors': ['No form was submitted']}) + + try: + nodename = form['nodename'].strip() + if not nodename: + raise Exception, 'nodename is blank' + except Exception, e: + errors.append('Unable to determine the current node name') + luci_log.debug_verbose('VDP1: no nodename: %s' % str(e)) + + try: + clustername = form['clustername'].strip() + if not clustername: + raise Exception, 'clustername is blank' + except Exception, e: + errors.append('Unable to determine the current cluster name') + luci_log.debug_verbose('VDP2: no clustername: %s' % str(e)) + + disable_list = list() + enable_list = list() + for i in form.items(): + try: + if i[0][:11] == '__daemon__:': + daemon_prop = i[1] + if len(daemon_prop) == 2: + if daemon_prop[1] == '1': + disable_list.append(daemon_prop[0]) + else: + if daemon_prop[1] == '0' and daemon_prop[2] == 'on': + enable_list.append(daemon_prop[0]) + except Exception, e: + luci_log.debug_verbose('VDP3: error: %s' % str(i)) + + if len(enable_list) < 1 and len(disable_list) < 1: + luci_log.debug_verbose('VDP4: no changes made') + response.redirect(request['URL'] + "?pagetype=" + NODE + "&clustername=" + clustername + '&nodename=' + nodename) + + nodename_resolved = resolve_nodename(self, clustername, nodename) + try: + rc = RicciCommunicator(nodename_resolved) + if not rc: + raise Exception, 'rc is None' + except Exception, e: + luci_log.debug_verbose('VDP5: RC %s: %s' % (nodename_resolved, str(e))) + errors.append('Unable to connect to the ricci agent on %s to update cluster daemon properties' % nodename_resolved) + return (False, {'errors': errors}) + + batch_id, result = updateServices(rc, enable_list, disable_list) + if batch_id is None or result is None: + luci_log.debug_verbose('VDP6: setCluserConf: batchid or result is None') + errors.append('Unable to update the cluster daemon properties on node %s' % nodename_resolved) + return (False, {'errors': errors}) + + try: + status_msg = 'Updating %s daemon properties:' % nodename_resolved + if len(enable_list) > 0: + status_msg += ' enabling %s' % str(enable_list)[1:-1] + if len(disable_list) > 0: + status_msg += ' disabling %s' % str(disable_list)[1:-1] + set_node_flag(self, clustername, rc.hostname(), batch_id, CLUSTER_DAEMON, status_msg) + except: + pass + + if len(errors) > 0: + return (False, {'errors': errors}) + + response.redirect(request['URL'] + "?pagetype=" + NODE + "&clustername=" + clustername + '&nodename=' + nodename + '&busyfirst=true') + formValidators = { 6: validateCreateCluster, 7: validateConfigCluster, @@ -728,11 +955,18 @@ 33: validateResourceAdd, 51: validateFenceAdd, 50: validateFenceEdit, + 55: validateDaemonProperties } def validatePost(self, request): - pagetype = int(request.form['pagetype']) + try: + pagetype = int(request.form['pagetype']) + except Exception, e: + luci_log.debug_verbose('VP0: error: %s' % str(e)) + return None + if not pagetype in formValidators: + luci_log.debug_verbose('VP1: no handler for page type %d' % pagetype) return None else: return formValidators[pagetype](self, request) @@ -748,21 +982,23 @@ except: request.SESSION.set('checkRet', {}) else: - try: request.SESSION.set('checkRet', {}) - except: pass + try: + request.SESSION.set('checkRet', {}) + except: + pass #First, see if a cluster is chosen, then #check that the current user can access that system cname = None try: cname = request[CLUNAME] - except KeyError, e: + except: cname = "" try: url = request['URL'] - except KeyError, e: - url = "." + except: + url = "/luci/cluster/index_html" try: pagetype = request[PAGETYPE] @@ -811,7 +1047,7 @@ clcfg['show_children'] = False #loop through all clusters - syslist= list() + syslist = list() for system in systems: clsys = {} clsys['Title'] = system[0] @@ -839,20 +1075,30 @@ return dummynode +def getnodes(self, model): + mb = model + nodes = mb.getNodes() + names = list() + for node in nodes: + names.append(node.getName()) + return names def createCluConfigTree(self, request, model): dummynode = {} + if not model: + return {} + #There should be a positive page type try: pagetype = request[PAGETYPE] - except KeyError, e: + except: pagetype = '3' try: url = request['URL'] except KeyError, e: - url = "." + url = "/luci/cluster/index_html" #The only way this method can run is if there exists #a clustername query var @@ -1110,7 +1356,7 @@ kids.append(rvadd) kids.append(rvcfg) rv['children'] = kids - ################################################################# + ################################################################ fd = {} fd['Title'] = "Failover Domains" fd['cfg_type'] = "failoverdomains" @@ -1266,8 +1512,10 @@ return model.getClusterName() def getClusterAlias(self, model): + if not model: + return '' alias = model.getClusterAlias() - if alias == None: + if alias is None: return model.getClusterName() else: return alias @@ -1281,6 +1529,7 @@ portaltabs = list() if not userAuthenticated(self): return portaltabs + selectedtab = "homebase" try: baseurl = req['URL'] @@ -1291,12 +1540,7 @@ else: selectedtab = "homebase" except KeyError, e: - pass - - try: - base2 = req['BASE2'] - except KeyError, e: - base2 = req['HTTP_HOST'] + req['SERVER_PORT'] + selectedtab = None htab = { 'Title':"homebase", 'Description':"Home base for this luci server", @@ -1309,7 +1553,7 @@ ctab = { 'Title':"cluster", 'Description':"Cluster configuration page", - 'Taburl':"/luci/cluster?pagetype=3"} + 'Taburl':"/luci/cluster/index_html?pagetype=3"} if selectedtab == "cluster": ctab['isSelected'] = True else: @@ -1331,7 +1575,7 @@ -def check_clusters(self,clusters): +def check_clusters(self, clusters): clist = list() for cluster in clusters: if cluster_permission_check(cluster[1]): @@ -1357,15 +1601,15 @@ try: clusterfolder = self.restrictedTraverse(path) if not clusterfolder: - luci_log.debug('GRA: cluster folder %s for %s is missing.' \ + luci_log.debug('GRA0: cluster folder %s for %s is missing.' \ % (path, clustername)) raise Exception, 'no cluster folder at %s' % path nodes = clusterfolder.objectItems('Folder') if len(nodes) < 1: - luci_log.debug('GRA: no cluster nodes for %s found.' % clustername) + luci_log.debug('GRA1: no cluster nodes for %s found.' % clustername) raise Exception, 'no cluster nodes were found at %s' % path except Exception, e: - luci_log.debug('GRA: cluster folder %s for %s is missing: %s.' \ + luci_log.debug('GRA2: cluster folder %s for %s is missing: %s.' \ % (path, clustername, str(e))) return None @@ -1383,17 +1627,31 @@ try: rc = RicciCommunicator(hostname) except RicciError, e: - luci_log.debug('GRA: ricci error: %s' % str(e)) + luci_log.debug('GRA3: ricci error: %s' % str(e)) continue try: clu_info = rc.cluster_info() except Exception, e: - luci_log.debug('GRA: cluster_info error: %s' % str(e)) + luci_log.debug('GRA4: cluster_info error: %s' % str(e)) + + try: + cur_name = str(clu_info[0]).strip().lower() + if not cur_name: + raise + except: + cur_name = None - if cluname != lower(clu_info[0]) and cluname != lower(clu_info[1]): + try: + cur_alias = str(clu_info[1]).strip().lower() + if not cur_alias: + raise + except: + cur_alias = None + + if (cur_name is not None and cluname != cur_name) and (cur_alias is not None and cluname != cur_alias): try: - luci_log.debug('GRA: %s reports it\'s in cluster %s:%s; we expect %s' \ + luci_log.debug('GRA5: %s reports it\'s in cluster %s:%s; we expect %s' \ % (hostname, clu_info[0], clu_info[1], cluname)) setNodeFlag(self, node, CLUSTER_NODE_NOT_MEMBER) except: @@ -1407,29 +1665,43 @@ except: pass - luci_log.debug('GRA: no ricci agent could be found for cluster %s' % cluname) + luci_log.debug('GRA6: no ricci agent could be found for cluster %s' \ + % cluname) return None def getRicciAgentForCluster(self, req): + clustername = None try: clustername = req['clustername'] - except KeyError, e: + if not clustername: + clustername = None + raise + except: try: clustername = req.form['clusterName'] if not clustername: - raise + clustername = None except: - luci_log.debug('no cluster name was specified in getRicciAgentForCluster') - return None + pass + + if clustername is None: + luci_log.debug('GRAFC0: no cluster name was found') + return None return getRicciAgent(self, clustername) def getClusterStatus(self, rc): - doc = getClusterStatusBatch(rc) + try: + doc = getClusterStatusBatch(rc) + except Exception, e: + luci_log.debug_verbose('GCS0: error: %s' % str(e)) + doc = None + if not doc: try: - luci_log.debug_verbose('getClusterStatusBatch returned None for %s/%s' % rc.cluster_info()) + luci_log.debug_verbose('GCS1: returned None for %s/%s' % rc.cluster_info()) except: pass + return {} results = list() @@ -1477,18 +1749,18 @@ baseurl = req['URL'] if not baseurl: raise KeyError, 'is blank' - except KeyError, e: - baseurl = '.' + except: + baseurl = '/luci/cluster/index_html' try: cluname = req['clustername'] if not cluname: raise KeyError, 'is blank' - except KeyError, e: + except: try: cluname = req.form['clusterName'] if not cluname: - raise + raise KeyError, 'is blank' except: cluname = '[error retrieving cluster name]' @@ -1504,7 +1776,7 @@ svc = modelb.retrieveServiceByName(item['name']) dom = svc.getAttribute("domain") - if dom != None: + if dom is not None: itemmap['faildom'] = dom else: itemmap['faildom'] = "No Failover Domain" @@ -1522,8 +1794,8 @@ baseurl = req['URL'] if not baseurl: raise KeyError, 'is blank' - except KeyError, e: - baseurl = '.' + except: + baseurl = '/luci/cluster/index_html' try: cluname = req['clustername'] @@ -1588,7 +1860,7 @@ #first get service by name from model svc = modelb.getService(servicename) resource_list = list() - if svc != None: + if svc is not None: indent_ctr = 0 children = svc.getChildren() for child in children: @@ -1603,7 +1875,7 @@ #Call yourself on every children #then return rc_map = {} - if parent != None: + if parent is not None: rc_map['parent'] = parent rc_map['name'] = child.getName() if child.isRefObject() == True: @@ -1631,22 +1903,27 @@ return child_depth + 1 def serviceStart(self, rc, req): + svcname = None try: svcname = req['servicename'] - except KeyError, e: + except: try: svcname = req.form['servicename'] except: - luci_log.debug_verbose('serviceStart error: no service name') - return None + pass + + if svcname is None: + luci_log.debug_verbose('serviceStart0: no service name') + return None + nodename = None try: nodename = req['nodename'] - except KeyError, e: + except: try: nodename = req.form['nodename'] except: - nodename = None + pass cluname = None try: @@ -1658,52 +1935,38 @@ pass if cluname is None: - luci_log.debug_verbose('serviceStart error: %s no service name' \ + luci_log.debug_verbose('serviceStart2: no cluster name for svc %s' \ % svcname) return None - ricci_agent = rc.hostname() - batch_number, result = startService(rc, svcname, nodename) if batch_number is None or result is None: - luci_log.debug_verbose('startService %s call failed' \ - % svcname) + luci_log.debug_verbose('startService3: SS(%s,%s,%s) call failed' \ + % (svcname, cluname, nodename)) return None - #Now we need to create a DB flag for this system. - path = str(CLUSTER_FOLDER_PATH + cluname) - batch_id = str(batch_number) - objname = str(ricci_agent + "____flag") - try: - clusterfolder = self.restrictedTraverse(path) - clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - objpath = str(path + "/" + objname) - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, SERVICE_START, "string") - flag.manage_addProperty(FLAG_DESC, "Starting service \'" + svcname + "\'", "string") + set_node_flag(self, cluname, rc.hostname(), str(batch_number), SERVICE_START, "Starting service \'%s\'" % svcname) except Exception, e: - luci_log.debug_verbose('Error creating flag at %s: %s' % (objpath, str(e))) + luci_log.debug_verbose('startService4: error setting flags for service %s@node %s for cluster %s' % (svcname, nodename, cluname)) response = req.RESPONSE - response.redirect(req['HTTP_REFERER'] + "&busyfirst=true") + response.redirect(req['URL'] + "?pagetype=" + SERVICE_LIST + "&clustername=" + cluname + '&busyfirst=true') def serviceRestart(self, rc, req): + svcname = None try: svcname = req['servicename'] - except KeyError, e: + except: try: svcname = req.form['servicename'] except: - luci_log.debug_verbose('no service name for serviceRestart') - return None - except: - luci_log.debug_verbose('no service name for serviceRestart') + pass + + if svcname is None: + luci_log.debug_verbose('serviceRestart0: no service name') return None - #Now we need to create a DB flag for this system. cluname = None try: cluname = req['clustername'] @@ -1714,51 +1977,36 @@ pass if cluname is None: - luci_log.debug_verbose('unable to determine cluser name for serviceRestart %s' % svcname) + luci_log.debug_verbose('serviceRestart1: no cluster for %s' % svcname) return None batch_number, result = restartService(rc, svcname) if batch_number is None or result is None: - luci_log.debug_verbose('restartService for %s failed' % svcname) + luci_log.debug_verbose('serviceRestart2: %s failed' % svcname) return None - ricci_agent = rc.hostname() - - path = str(CLUSTER_FOLDER_PATH + cluname) - batch_id = str(batch_number) - objname = str(ricci_agent + "____flag") - try: - clusterfolder = self.restrictedTraverse(path) - clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - - #Now we need to annotate the new DB object - objpath = str(path + "/" + objname) - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, SERVICE_RESTART, "string") - flag.manage_addProperty(FLAG_DESC, "Restarting service " + svcname, "string") + set_node_flag(self, cluname, rc.hostname(), str(batch_number), SERVICE_RESTART, "Restarting service \'%s\'" % svcname) except Exception, e: - luci_log.debug_verbose('Error creating flag in restartService %s: %s' \ - % (svcname, str(e))) + luci_log.debug_verbose('serviceRestart3: error setting flags for service %s for cluster %s' % (svcname, cluname)) response = req.RESPONSE - response.redirect(req['HTTP_REFERER'] + "&busyfirst=true") + response.redirect(req['URL'] + "?pagetype=" + SERVICE_LIST + "&clustername=" + cluname + '&busyfirst=true') def serviceStop(self, rc, req): + svcname = None try: svcname = req['servicename'] - except KeyError, e: + except: try: svcname = req.form['servicename'] except: - luci_log.debug_verbose('no service name for serviceStop') - return None - except: - luci_log.debug_verbose('no service name for serviceStop') + pass + + if svcname is None: + luci_log.debug_verbose('serviceStop0: no service name') return None - #Now we need to create a DB flag for this system. cluname = None try: cluname = req['clustername'] @@ -1769,37 +2017,21 @@ pass if cluname is None: - luci_log.debug_verbose('unable to determine cluser name for serviceStop %s' % svcname) + luci_log.debug_verbose('serviceStop1: no cluster name for %s' % svcname) return None batch_number, result = stopService(rc, svcname) if batch_number is None or result is None: - luci_log.debug_verbose('stopService for %s failed' % svcname) + luci_log.debug_verbose('serviceStop2: stop %s failed' % svcname) return None - ricci_agent = rc.hostname() - - path = str(CLUSTER_FOLDER_PATH + cluname) - batch_id = str(batch_number) - objname = str(ricci_agent + "____flag") - try: - clusterfolder = self.restrictedTraverse(path) - clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - objpath = str(path + "/" + objname) - flag = self.restrictedTraverse(objpath) - - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, SERVICE_STOP, "string") - flag.manage_addProperty(FLAG_DESC, "Stopping service " + svcname, "string") - time.sleep(2) + set_node_flag(self, cluname, rc.hostname(), str(batch_number), SERVICE_STOP, "Stopping service \'%s\'" % svcname) except Exception, e: - luci_log.debug_verbose('Error creating flags for stopService %s: %s' \ - % (svcname, str(e))) + luci_log.debug_verbose('serviceStop3: error setting flags for service %s for cluster %s' % (svcname, cluname)) response = req.RESPONSE - response.redirect(req['HTTP_REFERER'] + "&busyfirst=true") + response.redirect(req['URL'] + "?pagetype=" + SERVICE_LIST + "&clustername=" + cluname + '&busyfirst=true') def getFdomsInfo(self, modelb, request, clustatus): slist = list() @@ -1820,11 +2052,11 @@ fdom_map['cfgurl'] = baseurl + "?pagetype=" + FDOM_LIST + "&clustername=" + clustername ordered_attr = fdom.getAttribute('ordered') restricted_attr = fdom.getAttribute('restricted') - if ordered_attr != None and (ordered_attr == "true" or ordered_attr == "1"): + if ordered_attr is not None and (ordered_attr == "true" or ordered_attr == "1"): fdom_map['ordered'] = True else: fdom_map['ordered'] = False - if restricted_attr != None and (restricted_attr == "true" or restricted_attr == "1"): + if restricted_attr is not None and (restricted_attr == "true" or restricted_attr == "1"): fdom_map['restricted'] = True else: fdom_map['restricted'] = False @@ -1845,7 +2077,7 @@ else: nodesmap['status'] = NODE_INACTIVE priority_attr = node.getAttribute('priority') - if priority_attr != None: + if priority_attr is not None: nodesmap['priority'] = "0" nodelist.append(nodesmap) fdom_map['nodeslist'] = nodelist @@ -1858,7 +2090,7 @@ break #found more info about service... domain = svc.getAttribute("domain") - if domain != None: + if domain is not None: if domain == fdom.getName(): svcmap = {} svcmap['name'] = svcname @@ -1870,54 +2102,87 @@ fdomlist.append(fdom_map) return fdomlist -def processClusterProps(self, ricci_agent, request): - #First, retrieve cluster.conf from session - conf = request.SESSION.get('conf') - model = ModelBuilder(0, None, None, conf) - - #Next, determine actiontype and switch on it - actiontype = request[ACTIONTYPE] - - if actiontype == BASECLUSTER: - cp = model.getClusterPtr() - cfgver = cp.getConfigVersion() - - rcfgver = request['cfgver'] - - if cfgver != rcfgver: - cint = int(cfgver) - rint = int(rcfgver) - if rint > cint: - cp.setConfigVersion(rcfgver) - - rname = request['cluname'] - name = model.getClusterAlias() +def clusterTaskProcess(self, model, request): + try: + task = request['task'] + except: + try: + task = request.form['task'] + except: + luci_log.debug_verbose('CTP1: no task specified') + task = None - if rname != name: - cp.addAttribute('alias', rname) + if not model: + try: + cluname = request['clustername'] + if not cluname: + raise Exception, 'cluname is blank' + except: + try: + cluname = request.form['clustername'] + if not cluname: + raise Exception, 'cluname is blank' + except: + luci_log.debug_verbose('CTP0: no model/no cluster name') + return 'Unable to determine the cluster name.' + try: + model = getModelForCluster(self, cluname) + except Exception, e: + luci_log.debug_verbose('CPT1: GMFC failed for %s' % cluname) + model = None - response = request.RESPONSE - response.redirect(request['HTTP_REFERER'] + "&busyfirst=true") - return + if not model: + return 'Unable to get the model object for %s' % cluname - elif actiontype == FENCEDAEMON: - pass + if task == CLUSTER_STOP: + clusterStop(self, model) + elif task == CLUSTER_START: + clusterStart(self, model) + elif task == CLUSTER_RESTART: + clusterRestart(self, model) + elif task == CLUSTER_DELETE: + clusterStop(self, model, delete=True) + else: + return 'An unknown cluster task was requested.' - elif actiontype == MULTICAST: - pass + response = request.RESPONSE + response.redirect('%s?pagetype=%s&clustername=%s&busyfirst=true' \ + % (request['URL'], NODES, model.getClusterName())) - elif actiontype == QUORUMD: - pass +def getClusterInfo(self, model, req): + try: + cluname = req[CLUNAME] + except: + try: + cluname = req.form['clustername'] + except: + try: + cluname = req.form['clusterName'] + except: + luci_log.debug_verbose('GCI0: unable to determine cluster name') + return {} - else: - return + if model is None: + rc = getRicciAgent(self, cluname) + if not rc: + luci_log.debug_verbose('GCI1: unable to find a ricci agent for the %s cluster' % cluname) + return {} + try: + model = getModelBuilder(None, rc, rc.dom0()) + if not model: + raise Exception, 'model is none' + try: + req.SESSION.set('model', model) + except Exception, e2: + luci_log.debug_verbose('GCI2 unable to set model in session: %s' % str(e2)) + except Exception, e: + luci_log.debug_verbose('GCI3: unable to get model for cluster %s: %s' % (cluname, str(e))) + return {} -def getClusterInfo(self, model, req): - cluname = req[CLUNAME] - baseurl = req['URL'] + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + cluname + "&" + prop_baseurl = req['URL'] + '?' + PAGETYPE + '=' + CLUSTER_CONFIG + '&' + CLUNAME + '=' + cluname + '&' map = {} - basecluster_url = baseurl + ACTIONTYPE + "=" + BASECLUSTER + basecluster_url = prop_baseurl + PROPERTIES_TAB + "=" + PROP_GENERAL_TAB #needed: map['basecluster_url'] = basecluster_url #name field @@ -1929,14 +2194,14 @@ #new cluster params - if rhel5 #------------- #Fence Daemon Props - fencedaemon_url = baseurl + ACTIONTYPE + "=" + FENCEDAEMON + fencedaemon_url = prop_baseurl + PROPERTIES_TAB + "=" + PROP_FENCE_TAB map['fencedaemon_url'] = fencedaemon_url fdp = model.getFenceDaemonPtr() pjd = fdp.getAttribute('post_join_delay') - if pjd == None: + if pjd is None: pjd = "6" pfd = fdp.getAttribute('post_fail_delay') - if pfd == None: + if pfd is None: pfd = "0" #post join delay map['pjd'] = pjd @@ -1944,7 +2209,7 @@ map['pfd'] = pfd #------------- #if multicast - multicast_url = baseurl + ACTIONTYPE + "=" + MULTICAST + multicast_url = prop_baseurl + PROPERTIES_TAB + "=" + PROP_MCAST_TAB map['multicast_url'] = multicast_url #mcast addr is_mcast = model.isMulticast() @@ -1958,7 +2223,7 @@ #------------- #quorum disk params - quorumd_url = baseurl + ACTIONTYPE + "=" + QUORUMD + quorumd_url = prop_baseurl + PROPERTIES_TAB + "=" + PROP_QDISK_TAB map['quorumd_url'] = quorumd_url is_quorumd = model.isQuorumd() map['is_quorumd'] = is_quorumd @@ -1975,27 +2240,27 @@ if is_quorumd: qdp = model.getQuorumdPtr() interval = qdp.getAttribute('interval') - if interval != None: + if interval is not None: map['interval'] = interval tko = qdp.getAttribute('tko') - if tko != None: + if tko is not None: map['tko'] = tko votes = qdp.getAttribute('votes') - if votes != None: + if votes is not None: map['votes'] = votes min_score = qdp.getAttribute('min_score') - if min_score != None: + if min_score is not None: map['min_score'] = min_score device = qdp.getAttribute('device') - if device != None: + if device is not None: map['device'] = device label = qdp.getAttribute('label') - if label != None: + if label is not None: map['label'] = label heuristic_kids = qdp.getChildren() @@ -2003,24 +2268,24 @@ for kid in heuristic_kids: hmap = {} hname = kid.getAttribute('name') - if hname == None: + if hname is None: hname = h_ctr h_ctr = h_ctr + 1 hprog = kid.getAttribute('program') hscore = kid.getAttribute('score') hinterval = kid.getAttribute('interval') - if hprog == None: + if hprog is None: continue - if hname != None: + if hname is not None: hmap['hname'] = hname else: hmap['hname'] = "" hmap['hprog'] = hprog - if hscore != None: + if hscore is not None: hmap['hscore'] = hscore else: hmap['hscore'] = "" - if hinterval != None: + if hinterval is not None: hmap['hinterval'] = hinterval else: hmap['hinterval'] = "" @@ -2029,7 +2294,7 @@ return map -def getClustersInfo(self,status,req): +def getClustersInfo(self, status, req): map = {} nodelist = list() svclist = list() @@ -2062,6 +2327,12 @@ map['votes'] = clu['votes'] map['minquorum'] = clu['minQuorum'] map['clucfg'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_CONFIG + "&" + CLUNAME + "=" + clustername + + map['restart_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_RESTART + map['stop_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_STOP + map['start_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_START + map['delete_url'] = baseurl + "?" + PAGETYPE + "=" + CLUSTER_PROCESS + "&" + CLUNAME + "=" + clustername + '&task=' + CLUSTER_DELETE + svc_dict_list = list() for svc in svclist: svc_dict = {} @@ -2093,6 +2364,317 @@ return map +def nodeLeave(self, rc, clustername, nodename_resolved): + path = str(CLUSTER_FOLDER_PATH + clustername + '/' + nodename_resolved) + + try: + nodefolder = self.restrictedTraverse(path) + if not nodefolder: + raise Exception, 'cannot find database object at %s' % path + except Exception, e: + luci_log.debug('NLO: node_leave_cluster err: %s' % str(e)) + return None + + objname = str(nodename_resolved + "____flag") + fnpresent = noNodeFlagsPresent(self, nodefolder, objname, nodename_resolved) + + if fnpresent is None: + luci_log.debug('NL1: An error occurred while checking flags for %s' \ + % nodename_resolved) + return None + + if fnpresent == False: + luci_log.debug('NL2: flags are still present for %s -- bailing out' \ + % nodename_resolved) + return None + + batch_number, result = nodeLeaveCluster(rc) + if batch_number is None or result is None: + luci_log.debug_verbose('NL3: nodeLeaveCluster error: batch_number and/or result is None') + return None + + try: + set_node_flag(self, clustername, rc.hostname(), str(batch_number), NODE_LEAVE_CLUSTER, "Node \'%s\' leaving cluster" % nodename_resolved) + except Exception, e: + luci_log.debug_verbose('NL4: failed to set flags: %s' % str(e)) + return True + +def nodeJoin(self, rc, clustername, nodename_resolved): + batch_number, result = nodeJoinCluster(rc) + if batch_number is None or result is None: + luci_log.debug_verbose('NJ0: batch_number and/or result is None') + return None + + try: + set_node_flag(self, clustername, rc.hostname(), str(batch_number), NODE_JOIN_CLUSTER, "Node \'%s\' joining cluster" % nodename_resolved) + except Exception, e: + luci_log.debug_verbose('NJ1: failed to set flags: %s' % str(e)) + return True + +def clusterStart(self, model): + if model is None: + return None + + clustername = model.getClusterName() + nodes = model.getNodes() + if not nodes or len(nodes) < 1: + return None + + errors = 0 + for node in nodes: + nodename = node.getName().strip() + nodename_resolved = resolve_nodename(self, clustername, nodename) + + try: + rc = RicciCommunicator(nodename_resolved) + except Exception, e: + luci_log.debug_verbose('CStart: RC %s: %s' \ + % (nodename_resolved, str(e))) + errors += 1 + continue + if nodeJoin(self, rc, clustername, nodename_resolved) is None: + luci_log.debug_verbose('CStart1: nodeLeave %s' % nodename_resolved) + errors += 1 + + return errors + +def clusterStop(self, model, delete=False): + if model is None: + return None + + clustername = model.getClusterName() + nodes = model.getNodes() + if not nodes or len(nodes) < 1: + return None + + errors = 0 + for node in nodes: + nodename = node.getName().strip() + nodename_resolved = resolve_nodename(self, clustername, nodename) + + try: + rc = RicciCommunicator(nodename_resolved) + except Exception, e: + luci_log.debug_verbose('[%d] CStop0: RC %s: %s' \ + % (delete, nodename_resolved, str(e))) + errors += 1 + continue + if nodeLeave(self, rc, clustername, nodename_resolved) is None: + luci_log.debug_verbose('[%d] CStop1: nodeLeave %s' \ + % (delete, nodename_resolved)) + errors += 1 + return errors + +def clusterRestart(self, model): + snum_err = clusterStop(self, model) + if snum_err: + luci_log.debug_verbose('cluRestart0: clusterStop: %d errs' % snum_err) + jnum_err = clusterStart(self, model) + if jnum_err: + luci_log.debug_verbose('cluRestart0: clusterStart: %d errs' % jnum_err) + return snum_err + jnum_err + +def clusterDelete(self, model): + return clusterStop(self, model, delete=True) + +def forceNodeReboot(self, rc, clustername, nodename_resolved): + batch_number, result = nodeReboot(rc) + if batch_number is None or result is None: + luci_log.debug_verbose('FNR0: batch_number and/or result is None') + return None + + try: + set_node_flag(self, clustername, rc.hostname(), str(batch_number), NODE_REBOOT, "Node \'%s\' is being rebooted" % nodename_resolved) + except Exception, e: + luci_log.debug_verbose('FNR1: failed to set flags: %s' % str(e)) + return True + +def forceNodeFence(self, clustername, nodename, nodename_resolved): + path = str(CLUSTER_FOLDER_PATH + clustername) + + try: + clusterfolder = self.restrictedTraverse(path) + if not clusterfolder: + raise Exception, 'no cluster folder at %s' % path + except Exception, e: + luci_log.debug('FNF0: The cluster folder %s could not be found: %s' \ + % (clustername, str(e))) + return None + + try: + nodes = clusterfolder.objectItems('Folder') + if not nodes or len(nodes) < 1: + raise Exception, 'no cluster nodes' + except Exception, e: + luci_log.debug('FNF1: No cluster nodes for %s were found: %s' \ + % (clustername, str(e))) + return None + + found_one = False + for node in nodes: + if node[1].getId().find(nodename) != (-1): + continue + + try: + rc = RicciCommunicator(node[1].getId()) + if not rc: + raise Exception, 'rc is None' + except Exception, e: + luci_log.debug('FNF2: ricci error for host %s: %s' \ + % (node[0], str(e))) + continue + + if not rc.authed(): + rc = None + try: + snode = getStorageNode(self, node[1].getId()) + setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) + except: + pass + + try: + setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH) + except: + pass + + continue + found_one = True + break + + if not found_one: + return None + + batch_number, result = nodeFence(rc, nodename) + if batch_number is None or result is None: + luci_log.debug_verbose('FNF3: batch_number and/or result is None') + return None + + try: + set_node_flag(self, clustername, rc.hostname(), str(batch_number), NODE_FENCE, "Node \'%s\' is being fenced" % nodename_resolved) + except Exception, e: + luci_log.debug_verbose('FNF4: failed to set flags: %s' % str(e)) + return True + +def nodeDelete(self, rc, model, clustername, nodename, nodename_resolved): + #We need to get a node name other than the node + #to be deleted, then delete the node from the cluster.conf + #and propogate it. We will need two ricci agents for this task. + + # Make sure we can find a second node before we hose anything. + path = str(CLUSTER_FOLDER_PATH + clustername) + try: + clusterfolder = self.restrictedTraverse(path) + if not clusterfolder: + raise Exception, 'no cluster folder at %s' % path + except Exception, e: + luci_log.debug_verbose('ND0: node delete error for cluster %s: %s' \ + % (clustername, str(e))) + return None + + try: + nodes = clusterfolder.objectItems('Folder') + if not nodes or len(nodes) < 1: + raise Exception, 'no cluster nodes in DB' + except Exception, e: + luci_log.debug_verbose('ND1: node delete error for cluster %s: %s' \ + % (clustername, str(e))) + + found_one = False + for node in nodes: + if node[1].getId().find(nodename) != (-1): + continue + #here we make certain the node is up... + # XXX- we should also make certain this host is still + # in the cluster we believe it is. + try: + rc2 = RicciCommunicator(node[1].getId()) + except Exception, e: + luci_log.info('ND2: ricci %s error: %s' % (node[0], str(e))) + continue + + if not rc2.authed(): + try: + setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH) + except: + pass + + try: + snode = getStorageNode(self, node[0]) + setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) + except: + pass + + luci_log.debug_verbose('ND3: %s is not authed' % node[0]) + rc2 = None + continue + else: + found_one = True + break + + if not found_one: + luci_log.debug_verbose('ND4: unable to find ricci agent to delete %s from %s' % (nodename, clustername)) + return None + + #First, delete cluster.conf from node to be deleted. + #next, have node leave cluster. + batch_number, result = nodeLeaveCluster(rc, purge=True) + if batch_number is None or result is None: + luci_log.debug_verbose('ND5: batch_number and/or result is None') + return None + + #It is not worth flagging this node in DB, as we are going + #to delete it anyway. Now, we need to delete node from model + #and send out new cluster.conf + delete_target = None + nodelist = model.getNodes() + find_node = lower(nodename) + for n in nodelist: + try: + if lower(n.getName()) == find_node: + delete_target = n + break + except: + continue + + if delete_target is None: + luci_log.debug_verbose('ND6: unable to find delete target for %s in %s' \ + % (nodename, clustername)) + return None + + model.deleteNode(delete_target) + + try: + str_buf = model.exportModelAsString() + if not str_buf: + raise Exception, 'model string is blank' + except Exception, e: + luci_log.debug_verbose('ND7: exportModelAsString: %s' % str(e)) + return None + + # propagate the new cluster.conf via the second node + batch_number, result = setClusterConf(rc2, str(str_buf)) + if batch_number is None: + luci_log.debug_verbose('ND8: batch number is None after del node in NTP') + return None + + #Now we need to delete the node from the DB + path = str(CLUSTER_FOLDER_PATH + clustername) + del_path = str(path + '/' + nodename_resolved) + + try: + delnode = self.restrictedTraverse(del_path) + clusterfolder = self.restrictedTraverse(path) + clusterfolder.manage_delObjects(delnode[0]) + except Exception, e: + luci_log.debug_verbose('ND9: error deleting %s: %s' \ + % (del_path, str(e))) + + try: + set_node_flag(self, clustername, rc2.hostname(), str(batch_number), NODE_DELETE, "Deleting node \'%s\'" % nodename_resolved) + except Exception, e: + luci_log.debug_verbose('ND10: failed to set flags: %s' % str(e)) + return True + def nodeTaskProcess(self, model, request): try: clustername = request['clustername'] @@ -2122,9 +2704,6 @@ return None nodename_resolved = resolve_nodename(self, clustername, nodename) - if not nodename_resolved or not nodename or not task or not clustername: - luci_log.debug('resolve_nodename failed for NTP') - return None if task != NODE_FENCE: # Fencing is the only task for which we don't @@ -2171,319 +2750,43 @@ return None if task == NODE_LEAVE_CLUSTER: - path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved) - - try: - nodefolder = self.restrictedTraverse(path) - if not nodefolder: - raise Exception, 'cannot find directory at %s' % path - except Exception, e: - luci_log.debug('node_leave_cluster err: %s' % str(e)) + if nodeLeave(self, rc, clustername, nodename_resolved) is None: + luci_log.debug_verbose('NTP: nodeLeave failed') return None - objname = str(nodename_resolved + "____flag") - - fnpresent = noNodeFlagsPresent(self, nodefolder, objname, nodename_resolved) - if fnpresent is None: - luci_log.debug('An error occurred while checking flags for %s' \ - % nodename_resolved) - return None - - if fnpresent == False: - luci_log.debug('flags are still present for %s -- bailing out' \ - % nodename_resolved) - return None - - batch_number, result = nodeLeaveCluster(rc) - if batch_number is None or result is None: - luci_log.debug_verbose('nodeLeaveCluster error: batch_number and/or result is None') - return None - - batch_id = str(batch_number) - objpath = str(path + "/" + objname) - - try: - nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, NODE_LEAVE_CLUSTER, "string") - flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' leaving cluster", "string") - except: - luci_log.debug('An error occurred while setting flag %s' % objpath) - response = request.RESPONSE - #Is this correct? Should we re-direct to the cluster page? - response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername) + response.redirect(request['URL'] + "?pagetype=" + NODES + "&clustername=" + clustername + '&busyfirst=true') elif task == NODE_JOIN_CLUSTER: - batch_number, result = nodeJoinCluster(rc) - if batch_number is None or result is None: - luci_log.debug_verbose('nodeJoin error: batch_number and/or result is None') + if nodeJoin(self, rc, clustername, nodename_resolved) is None: + luci_log.debug_verbose('NTP: nodeJoin failed') return None - path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved) - batch_id = str(batch_number) - objname = str(nodename_resolved + "____flag") - objpath = str(path + "/" + objname) - - try: - nodefolder = self.restrictedTraverse(path) - nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, NODE_JOIN_CLUSTER, "string") - flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' joining cluster", "string") - except Exception, e: - luci_log.debug_verbose('nodeJoin error: creating flags at %s: %s' \ - % (path, str(e))) - response = request.RESPONSE - #Once again, is this correct? Should we re-direct to the cluster page? - response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername) + response.redirect(request['URL'] + "?pagetype=" + NODES + "&clustername=" + clustername + '&busyfirst=true') elif task == NODE_REBOOT: - batch_number, result = nodeReboot(rc) - if batch_number is None or result is None: - luci_log.debug_verbose('nodeReboot: batch_number and/or result is None') + if forceNodeReboot(self, rc, clustername, nodename_resolved) is None: + luci_log.debug_verbose('NTP: nodeReboot failed') return None - path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved) - batch_id = str(batch_number) - objname = str(nodename_resolved + "____flag") - objpath = str(path + "/" + objname) - - try: - nodefolder = self.restrictedTraverse(path) - nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, NODE_REBOOT, "string") - flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being rebooted", "string") - except Exception, e: - luci_log.debug_verbose('nodeReboot err: creating flags at %s: %s' \ - % (path, str(e))) - response = request.RESPONSE - #Once again, is this correct? Should we re-direct to the cluster page? - response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername) + response.redirect(request['URL'] + "?pagetype=" + NODES + "&clustername=" + clustername + '&busyfirst=true') elif task == NODE_FENCE: - #here, we DON'T want to open connection to node to be fenced. - path = str(CLUSTER_FOLDER_PATH + clustername) - try: - clusterfolder = self.restrictedTraverse(path) - if not clusterfolder: - raise Exception, 'no cluster folder at %s' % path - except Exception, e: - luci_log.debug('The cluster folder for %s could not be found: %s' \ - % (clustername, str(e))) - return None - - try: - nodes = clusterfolder.objectItems('Folder') - if not nodes or len(nodes) < 1: - raise Exception, 'no cluster nodes' - except Exception, e: - luci_log.debug('No cluster nodes for %s were found: %s' \ - % (clustername, str(e))) - return None - - found_one = False - for node in nodes: - if node[1].getId().find(nodename) != (-1): - continue - - try: - rc = RicciCommunicator(node[1].getId()) - if not rc: - continue - except RicciError, e: - luci_log.debug('ricci error for host %s: %s' \ - % (node[0], str(e))) - continue - except: - continue - - if not rc.authed(): - rc = None - try: - snode = getStorageNode(self, node[1].getId()) - setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) - except: - pass - - try: - setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH) - except: - pass - - continue - found_one = True - break - - if not found_one: - return None - - batch_number, result = nodeFence(rc, nodename) - if batch_number is None or result is None: - luci_log.debug_verbose('nodeFence: batch_number and/or result is None') + if forceNodeFence(self, clustername, nodename, nodename_resolved) is None: + luci_log.debug_verbose('NTP: nodeFencefailed') return None - path = str(path + "/" + nodename_resolved) - batch_id = str(batch_number) - objname = str(nodename_resolved + "____flag") - objpath = str(path + "/" + objname) - - try: - nodefolder = self.restrictedTraverse(path) - nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, NODE_FENCE, "string") - flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being fenced", "string") - except Exception, e: - luci_log.debug_verbose('nodeFence err: creating flags at %s: %s' \ - % (path, str(e))) - response = request.RESPONSE - #Once again, is this correct? Should we re-direct to the cluster page? - response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername) + response.redirect(request['URL'] + "?pagetype=" + NODES + "&clustername=" + clustername + '&busyfirst=true') elif task == NODE_DELETE: - #We need to get a node name other than the node - #to be deleted, then delete the node from the cluster.conf - #and propogate it. We will need two ricci agents for this task. - - # Make sure we can find a second node before we hose anything. - path = str(CLUSTER_FOLDER_PATH + clustername) - try: - clusterfolder = self.restrictedTraverse(path) - if not clusterfolder: - raise Exception, 'no cluster folder at %s' % path - except Exception, e: - luci_log.debug_verbose('node delete error for cluster %s: %s' \ - % (clustername, str(e))) + if nodeDelete(self, rc, model, clustername, nodename, nodename_resolved) is None: + luci_log.debug_verbose('NTP: nodeDelete failed') return None - - try: - nodes = clusterfolder.objectItems('Folder') - if not nodes or len(nodes) < 1: - raise Exception, 'no cluster nodes in DB' - except Exception, e: - luci_log.debug_verbose('node delete error for cluster %s: %s' \ - % (clustername, str(e))) - - found_one = False - for node in nodes: - if node[1].getId().find(nodename) != (-1): - continue - #here we make certain the node is up... - # XXX- we should also make certain this host is still - # in the cluster we believe it is. - try: - rc2 = RicciCommunicator(node[1].getId()) - except Exception, e: - luci_log.info('ricci %s error: %s' % (node[0], str(e))) - continue - except: - continue - - if not rc2.authed(): - try: - setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH) - except: - pass - - try: - snode = getStorageNode(self, node[0]) - setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) - except: - pass - - luci_log.debug_verbose('%s is not authed' % node[0]) - rc2 = None - continue - else: - found_one = True - break - - if not found_one: - luci_log.debug_verbose('unable to find ricci node to delete %s from %s' % (nodename, clustername)) - return None - - #First, delete cluster.conf from node to be deleted. - #next, have node leave cluster. - batch_number, result = nodeLeaveCluster(rc, purge=True) - if batch_number is None or result is None: - luci_log.debug_verbose('nodeDelete: batch_number and/or result is None') - return None - - #It is not worth flagging this node in DB, as we are going - #to delete it anyway. Now, we need to delete node from model - #and send out new cluster.conf - delete_target = None - nodelist = model.getNodes() - find_node = lower(nodename) - for n in nodelist: - try: - if lower(n.getName()) == find_node: - delete_target = n - break - except: - continue - - if delete_target is None: - luci_log.debug_verbose('unable to find delete target for %s in %s' \ - % (nodename, clustername)) - return None - - model.deleteNode(delete_target) - - try: - str_buf = model.exportModelAsString() - if not str_buf: - raise Exception, 'model string is blank' - except Exception, e: - luci_log.debug_verbose('NTP exportModelAsString: %s' % str(e)) - return None - - # propagate the new cluster.conf via the second node - batch_number, result = setClusterConf(rc2, str(str_buf)) - if batch_number is None: - luci_log.debug_verbose('batch number is None after del node in NTP') - return None - - #Now we need to delete the node from the DB - path = str(CLUSTER_FOLDER_PATH + clustername) - del_path = str(path + "/" + nodename_resolved) - - try: - delnode = self.restrictedTraverse(del_path) - clusterfolder = self.restrictedTraverse(path) - clusterfolder.manage_delObjects(delnode[0]) - except Exception, e: - luci_log.debug_verbose('error deleting %s: %s' % (del_path, str(e))) - - batch_id = str(batch_number) - objname = str(nodename_resolved + "____flag") - objpath = str(path + "/" + objname) - - try: - clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, NODE_DELETE, "string") - flag.manage_addProperty(FLAG_DESC, "Deleting node \'" + nodename + "\'", "string") - except Exception, e: - luci_log.debug_verbose('nodeDelete %s err setting flag@%s: %s' \ - % (nodename, objpath, str(e))) - response = request.RESPONSE - response.redirect(request['HTTP_REFERER'] + "&busyfirst=true") + response.redirect(request['URL'] + "?pagetype=" + NODES + "&clustername=" + clustername + '&busyfirst=true') def getNodeInfo(self, model, status, request): infohash = {} + item = None baseurl = request['URL'] nodestate = NODE_ACTIVE svclist = list() @@ -2553,25 +2856,38 @@ #return infohash infohash['d_states'] = None + + nodename_resolved = resolve_nodename(self, clustername, nodename) + if nodestate == NODE_ACTIVE or nodestate == NODE_INACTIVE: #call service module on node and find out which daemons are running - rc = RicciCommunicator(nodename) - dlist = list() - dlist.append("ccsd") - dlist.append("cman") - dlist.append("fenced") - dlist.append("rgmanager") - states = getDaemonStates(rc, dlist) - infohash['d_states'] = states + try: + rc = RicciCommunicator(nodename_resolved) + if not rc: + raise Exception, 'rc is none' + except Exception, e: + rc = None + luci_log.info('Error connecting to %s: %s' \ + % (nodename_resolved, str(e))) + + if rc is not None: + dlist = list() + dlist.append("ccsd") + dlist.append("cman") + dlist.append("fenced") + dlist.append("rgmanager") + states = getDaemonStates(rc, dlist) + infohash['d_states'] = states - infohash['logurl'] = '/luci/logs/?nodename=' + nodename + '&clustername=' + clustername + infohash['logurl'] = '/luci/logs/?nodename=' + nodename_resolved + '&clustername=' + clustername return infohash #get list of faildoms for node -def getNodesInfo(self, model,status,req): +def getNodesInfo(self, model, status, req): resultlist = list() nodelist = list() svclist = list() + #Sort into lists... for item in status: if item['type'] == "node": @@ -2581,13 +2897,36 @@ else: continue + try: + clustername = req['clustername'] + if not clustername: + raise KeyError, 'clustername is blank' + except: + try: + clustername = req.form['clustername'] + raise KeyError, 'clustername is blank' + except: + try: + clustername = req.form['clusterName'] + except: + try: + clustername = model.getClusterName() + except: + luci_log.debug_verbose('GNI0: unable to determine cluster name') + return {} + + for item in nodelist: map = {} name = item['name'] map['nodename'] = name - clustername = req['clustername'] - baseurl = req['URL'] + try: + baseurl = req['URL'] + except: + baseurl = '/luci/cluster/index_html' + cfgurl = baseurl + "?" + PAGETYPE + "=" + NODE + "&" + CLUNAME + "=" + clustername + "&nodename=" + name + map['configurl'] = cfgurl map['fenceurl'] = cfgurl + "#fence" if item['clustered'] == "true": @@ -2600,7 +2939,10 @@ map['status'] = NODE_INACTIVE map['status_str'] = NODE_INACTIVE_STR - map['logurl'] = '/luci/logs?nodename=' + name + '&clustername=' + clustername + nodename_resolved = resolve_nodename(self, clustername, name) + + map['logurl'] = '/luci/logs?nodename=' + nodename_resolved + '&clustername=' + clustername + #set up URLs for dropdown menu... if map['status'] == NODE_ACTIVE: map['jl_url'] = baseurl + "?pagetype=" + NODE_PROCESS + "&task=" + NODE_LEAVE_CLUSTER + "&nodename=" + name + "&clustername=" + clustername @@ -2644,115 +2986,328 @@ def getFence(self, model, request): map = {} - fencename = request['fencedevicename'] + fencename = request['fencename'] fencedevs = model.getFenceDevices() for fencedev in fencedevs: if fencedev.getName().strip() == fencename: map = fencedev.getAttributes() + try: + map['pretty_name'] = FENCE_OPTS[fencedev.getAgentType()] + except: + map['pretty_name'] = fencedev.getAgentType() + + nodes_used = list() + nodes = model.getNodes() + for node in nodes: + flevels = node.getFenceLevels() + for flevel in flevels: #These are the method blocks... + kids = flevel.getChildren() + for kid in kids: #These are actual devices in each level + if kid.getName().strip() == fencedev.getName().strip(): + #See if this fd already has an entry for this node + found_duplicate = False + for item in nodes_used: + if item['nodename'] == node.getName().strip(): + found_duplicate = True + if found_duplicate == True: + continue + baseurl = request['URL'] + clustername = model.getClusterName() + node_hash = {} + node_hash['nodename'] = node.getName().strip() + node_hash['nodeurl'] = baseurl + "?clustername=" + clustername + "&nodename=" + node.getName() + "&pagetype=" + NODE + nodes_used.append(node_hash) + + map['nodesused'] = nodes_used return map return map + +def getFDForInstance(fds, name): + for fd in fds: + if fd.getName().strip() == name: + return fd + + raise def getFenceInfo(self, model, request): + clustername = request['clustername'] + baseurl = request['URL'] map = {} - fencedevs = list() - level1 = list() - level2 = list() + level1 = list() #First level fence devices + level2 = list() #Second level fence devices + shared1 = list() #List of available sharable fence devs not used in level1 + shared2 = list() #List of available sharable fence devs not used in level2 map['level1'] = level1 map['level2'] = level2 - map['fencedevs'] = fencedevs - nodename = "" - if request == None: #this is being called by the fence device page - #Get list of fence devices - fds = model.getFenceDevices() - for fd in fds: - #create fencedev hashmap - if fd.isShared() == True: - fencedev = fd.getAttributes() - fencedevs.append(fencedev) - - return map + map['shared1'] = shared1 + map['shared2'] = shared2 - else: - try: - nodename = request['nodename'] - except KeyError, e: - raise GeneralError('FATAL', "Could not extract nodename from request") + major_num = 1 + minor_num = 100 + + try: + nodename = request['nodename'] + except KeyError, e: + raise GeneralError('FATAL', "Could not extract nodename from request") - #here we need to get fences for a node - just the first two levels - #then fill in two data structures with all attr's - try: - node = model.retrieveNodeByName(nodename) - except GeneralError, e: - raise GeneralError('FATAL', "Couldn't find node name in current node list") + #Here we need to get fences for a node - just the first two levels + #Each level has its own list of fence devs used in that level + #For each fence dev, a list of instance structs is appended + #In addition, for each level, a list of available but unused fence devs + #is returned. + try: + node = model.retrieveNodeByName(nodename) + except GeneralError, e: + raise GeneralError('FATAL', "Couldn't find node name in current node list") - levels = node.getFenceLevels() - len_levels = len(levels) + fds = model.getFenceDevices() - if len_levels == 0: - return map + levels = node.getFenceLevels() + len_levels = len(levels) + + if len_levels == 0: + return map - for i in xrange(2): - if not i in levels: + if len_levels >= 1: + first_level = levels[0] + kids = first_level.getChildren() + last_kid_fd = None #This is a marker for allowing multi instances + #beneath a fencedev + for kid in kids: + instance_name = kid.getName().strip() + try: + fd = getFDForInstance(fds, instance_name) + except: + fd = None #Set to None in case last time thru loop continue - fence_struct = {} - if levels[i] != None: - level = levels[i] - else: - #No more levels... + + if fd is not None: + if fd.isShared() == False: #Not a shared dev...build struct and add + fencedev = {} + fencedev['prettyname'] = FENCE_OPTS[fd.getAgentType()] + fencedev['isShared'] = False + fencedev['id'] = str(major_num) + major_num = major_num + 1 + devattrs = fd.getAttributes() + kees = devattrs.keys() + for kee in kees: + fencedev[kee] = devattrs[kee] + kidattrs = kid.getAttributes() + kees = kidattrs.keys() + for kee in kees: + if kee == "name": + continue #Don't duplicate name attr + fencedev[kee] = kidattrs[kee] + #This fencedev struct is complete, and needs to be placed on the + #level1 Q. Because it is non-shared, we should set last_kid_fd + #to none. + last_kid_fd = None + level1.append(fencedev) + else: #This dev is shared + if (last_kid_fd is not None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd + instance_struct = {} + instance_struct['id'] = str(minor_num) + minor_num = minor_num + 1 + kidattrs = kid.getAttributes() + kees = kidattrs.keys() + for kee in kees: + if kee == "name": + continue + instance_struct[kee] = kidattrs[kee] + #Now just add this struct to last_kid_fd and reset last_kid_fd + ilist = last_kid_fd['instance_list'] + ilist.append(instance_struct) + last_kid_fd = fd + continue + else: #Shared, but not used above...so we need a new fencedev struct + fencedev = {} + fencedev['prettyname'] = FENCE_OPTS[fd.getAgentType()] + fencedev['isShared'] = True + fencedev['cfgurl'] = baseurl + "?clustername=" + clustername + "&fencename=" + fd.getName().strip() + "&pagetype=" + FENCEDEV + fencedev['id'] = str(major_num) + major_num = major_num + 1 + inlist = list() + fencedev['instance_list'] = inlist + devattrs = fd.getAttributes() + kees = devattrs.keys() + for kee in kees: + fencedev[kee] = devattrs[kee] + instance_struct = {} + kidattrs = kid.getAttributes() + kees = kidattrs.keys() + for kee in kees: + if kee == "name": + continue + instance_struct[kee] = kidattrs[kee] + inlist.append(instance_struct) + level1.append(fencedev) + last_kid_fd = fd + continue + + #level1 list is complete now, but it is still necessary to build shared1 + sharednames = list() + for fd in fds: + isUnique = True + if fd.isShared() == False: continue - kids = level.getChildren() - if len(kids) == 0: + for fdev in level1: + if fd.getName().strip() == fdev['name']: + isUnique = False + break + if isUnique == True: + shared_struct = {} + shared_struct['name'] = fd.getName().strip() + agentname = fd.getAgentType() + shared_struct['agent'] = agentname + shared_struct['prettyname'] = FENCE_OPTS[agentname] + shared1.append(shared_struct) + + #YUK: This next section violates the DRY rule, :-( + if len_levels >= 2: + second_level = levels[1] + kids = second_level.getChildren() + last_kid_fd = None #This is a marker for allowing multi instances + #beneath a fencedev + for kid in kids: + instance_name = kid.getName().strip() + try: + fd = getFDForInstance(fds, instance_name) + except: + fd = None #Set to None in case last time thru loop continue - else: - #for each kid, - ### resolve name, find fence device - ### Add fd to list, if it is not there yet - ### determine if it is a shared fence type - ### if it is a shared device, add instance entry - fds = model.getFenceDevices() - fence_struct = None - for kid in kids: - name = kid.getName() - found_fd = False - if not i in map: - continue - for entry in map[i]: - if entry['name'] == name: - fence_struct = entry - found_fd = True - break - if found_fd == False: - for fd in fds: - if fd.getName() == name: #Found the fence device - fence_struct = {} - fence_struct['isShareable'] = fd.isShared() - fd_attrs = fd.getAttributes() - kees = fd_attrs.keys() - for kee in kees: - fence_struct[kee] = fd_attrs[kee] - fi_attrs = kid.getAttributes() - kees = fi_attrs.keys() - if fence_struct['isShareable'] == True: + if fd is not None: + if fd.isShared() == False: #Not a shared dev...build struct and add + fencedev = {} + fencedev['prettyname'] = FENCE_OPTS[fd.getAgentType()] + fencedev['isShared'] = False + fencedev['id'] = str(major_num) + major_num = major_num + 1 + devattrs = fd.getAttributes() + kees = devattrs.keys() + for kee in kees: + fencedev[kee] = devattrs[kee] + kidattrs = kid.getAttributes() + kees = kidattrs.keys() + for kee in kees: + if kee == "name": + continue #Don't duplicate name attr + fencedev[kee] = kidattrs[kee] + #This fencedev struct is complete, and needs to be placed on the + #level2 Q. Because it is non-shared, we should set last_kid_fd + #to none. + last_kid_fd = None + level2.append(fencedev) + else: #This dev is shared + if (last_kid_fd is not None) and (fd.getName().strip() == last_kid_fd.getName().strip()): #just append a new instance struct to last_kid_fd instance_struct = {} + instance_struct['id'] = str(minor_num) + minor_num = minor_num + 1 + kidattrs = kid.getAttributes() + kees = kidattrs.keys() for kee in kees: - instance_struct[kee] = fi_attrs[kee] - try: - check = fence_struct['instances'] - check.append(instance_struct) - except KeyError, e: - fence_struct['instances'] = list() - fence_struct['instances'].append(instance_struct) - else: #Not a shareable fence device type + if kee == "name": + continue + instance_struct[kee] = kidattrs[kee] + #Now just add this struct to last_kid_fd and reset last_kid_fd + ilist = last_kid_fd['instance_list'] + ilist.append(instance_struct) + last_kid_fd = fd + continue + else: #Shared, but not used above...so we need a new fencedev struct + fencedev = {} + fencedev['prettyname'] = FENCE_OPTS[fd.getAgentType()] + fencedev['isShared'] = True + fencedev['cfgurl'] = baseurl + "?clustername=" + clustername + "&fencename=" + fd.getName().strip() + "&pagetype=" + FENCEDEV + fencedev['id'] = str(major_num) + major_num = major_num + 1 + inlist = list() + fencedev['instance_list'] = inlist + devattrs = fd.getAttributes() + kees = devattrs.keys() for kee in kees: - fence_struct[kee] = fi_attrs[kee] - if i == 0: - level1.append(fence_struct) - else: - level2.append(fence_struct) + fencedev[kee] = devattrs[kee] + instance_struct = {} + kidattrs = kid.getAttributes() + kees = kidattrs.keys() + for kee in kees: + if kee == "name": + continue + instance_struct[kee] = kidattrs[kee] + inlist.append(instance_struct) + level2.append(fencedev) + last_kid_fd = fd + continue - return map + #level2 list is complete but like above, we need to build shared2 + sharednames = list() + for fd in fds: + isUnique = True + if fd.isShared() == False: + continue + for fdev in level2: + if fd.getName.strip() == fdev['name']: + isUnique = False + break + if isUnique == True: + shared_struct = {} + shared_struct['name'] = fd.getName().strip() + agentname = fd.getAgentType() + shared_struct['agent'] = agentname + shared_struct['prettyname'] = FENCE_OPTS[agentname] + shared2.append(shared_struct) + + return map +def getFencesInfo(self, model, request): + clustername = request['clustername'] + baseurl = request['URL'] + map = {} + fencedevs = list() #This is for the fencedev list page + map['fencedevs'] = fencedevs + #Get list of fence devices + fds = model.getFenceDevices() + nodes_used = list() #This section determines which nodes use the dev + for fd in fds: + #create fencedev hashmap + if fd.isShared() == True: + fencedev = {} + attr_hash = fd.getAttributes() + kees = attr_hash.keys() + for kee in kees: + fencedev[kee] = attr_hash[kee] #copy attrs over + try: + fencedev['pretty_name'] = FENCE_OPTS[fd.getAgentType()] + except: + fencedev['pretty_name'] = fd.getAgentType() + #Add config url for this fencedev + fencedev['cfgurl'] = baseurl + "?clustername=" + clustername + "&fencename=" + fd.getName().strip() + "&pagetype=" + FENCEDEV + + nodes = model.getNodes() + for node in nodes: + flevels = node.getFenceLevels() + for flevel in flevels: #These are the method blocks... + kids = flevel.getChildren() + for kid in kids: #These are actual devices in each level + if kid.getName().strip() == fd.getName().strip(): + #See if this fd already has an entry for this node + found_duplicate = False + for item in nodes_used: + if item['nodename'] == node.getName().strip(): + found_duplicate = True + if found_duplicate == True: + continue + node_hash = {} + node_hash['nodename'] = node.getName().strip() + node_hash['nodeurl'] = baseurl + "?clustername=" + clustername + "&nodename=" + node.getName() + "&pagetype=" + NODE + nodes_used.append(node_hash) + + fencedev['nodesused'] = nodes_used + fencedevs.append(fencedev) + + return map + + def getLogsForNode(self, request): try: nodename = request['nodename'] @@ -2780,12 +3335,7 @@ if clustername is None: nodename_resolved = nodename else: - try: - nodename_resolved = resolve_nodename(self, clustername, nodename) - except: - luci_log.debug_verbose('Unable to resolve node name %s/%s to retrieve logging information' \ - % (nodename, clustername)) - return 'Unable to resolve node name for %s in cluster %s' % (nodename, clustername) + nodename_resolved = resolve_nodename(self, clustername, nodename) try: rc = RicciCommunicator(nodename_resolved) @@ -2838,7 +3388,7 @@ try: stringbuf = model.exportModelAsString() if not stringbuf: - raise Exception, 'model is blank' + raise Exception, 'model is blank' except Exception, e: luci_log.debug_verbose('exportModelAsString error: %s' % str(e)) return None @@ -2861,17 +3411,14 @@ def getXenVMInfo(self, model, request): try: xenvmname = request['servicename'] - except KeyError, e: + except: try: xenvmname = request.form['servicename'] except: luci_log.debug_verbose('servicename is missing from request') return {} - except: - luci_log.debug_verbose('servicename is missing from request') - return {} - try: + try: xenvm = model.retrieveXenVMsByName(xenvmname) except: luci_log.debug('An error occurred while attempting to get VM %s' \ @@ -2915,7 +3462,7 @@ try: items = clusterfolder.objectItems('ManagedSystem') if not items or len(items) < 1: - luci_log.debug_verbose('ICB3: no flags at %s for cluster %s' \ + luci_log.debug_verbose('ICB3: NOT BUSY: no flags at %s for cluster %s' \ % (cluname, path)) return map #This returns an empty map, and should indicate not busy except Exception, e: @@ -2925,7 +3472,7 @@ luci_log.debug('ICB5: An error occurred while looking for cluster %s flags at path %s' % (cluname, path)) return map - luci_log.debug_verbose('ICB6: isClusterBusy: %s is busy: %d flags' \ + luci_log.debug_verbose('ICB6: %s is busy: %d flags' \ % (cluname, len(items))) map['busy'] = "true" #Ok, here is what is going on...if there is an item, @@ -2962,27 +3509,25 @@ rc = RicciCommunicator(ricci[0]) if not rc: rc = None - raise RicciError, 'rc is None for %s' % ricci[0] - except RicciError, e: + luci_log.debug_verbose('ICB6b: rc is none') + except Exception, e: rc = None - luci_log.debug_verbose('ICB7: ricci returned error in iCB for %s: %s' \ + luci_log.debug_verbose('ICB7: RC: %s: %s' \ % (cluname, str(e))) - except: - rc = None - luci_log.info('ICB8: ricci connection failed for cluster %s' % cluname) batch_id = None if rc is not None: try: batch_id = item[1].getProperty(BATCH_ID) - luci_log.debug_verbose('ICB8A: got batch_id %s from %s' \ + luci_log.debug_verbose('ICB8: got batch_id %s from %s' \ % (batch_id, item[0])) except Exception, e: try: luci_log.debug_verbose('ICB8B: failed to get batch_id from %s: %s' \ % (item[0], str(e))) except: - luci_log.debug_verbose('ICB8C: failed to get batch_id from %s' % item[0]) + luci_log.debug_verbose('ICB8C: failed to get batch_id from %s' \ + % item[0]) if batch_id is not None: try: @@ -3030,18 +3575,31 @@ elif laststatus == 0: node_report['statusindex'] = 0 node_report['statusmessage'] = RICCI_CONNECT_FAILURE_MSG + PRE_INSTALL + elif laststatus == DISABLE_SVC_TASK: + node_report['statusindex'] = DISABLE_SVC_TASK + node_report['statusmessage'] = RICCI_CONNECT_FAILURE_MSG + PRE_CFG elif laststatus == REBOOT_TASK: node_report['statusindex'] = REBOOT_TASK node_report['statusmessage'] = RICCI_CONNECT_FAILURE_MSG + PRE_CFG elif laststatus == SEND_CONF: node_report['statusindex'] = SEND_CONF node_report['statusmessage'] = RICCI_CONNECT_FAILURE_MSG + PRE_JOIN + elif laststatus == ENABLE_SVC_TASK: + node_report['statusindex'] = ENABLE_SVC_TASK + node_report['statusmessage'] = RICCI_CONNECT_FAILURE_MSG + PRE_JOIN + else: + node_report['statusindex'] = 0 + node_report['statusmessage'] = RICCI_CONNECT_FAILURE_MSG + ' Install is in an unknown state.' nodereports.append(node_report) continue elif creation_status == -(INSTALL_TASK): node_report['iserror'] = True (err_code, err_msg) = extract_module_status(batch_xml, INSTALL_TASK) node_report['errormessage'] = CLUNODE_CREATE_ERRORS[INSTALL_TASK] + err_msg + elif creation_status == -(DISABLE_SVC_TASK): + node_report['iserror'] = True + (err_code, err_msg) = extract_module_status(batch_xml, DISABLE_SVC_TASK) + node_report['errormessage'] = CLUNODE_CREATE_ERRORS[DISABLE_SVC_TASK] + err_msg elif creation_status == -(REBOOT_TASK): node_report['iserror'] = True (err_code, err_msg) = extract_module_status(batch_xml, REBOOT_TASK) @@ -3050,6 +3608,10 @@ node_report['iserror'] = True (err_code, err_msg) = extract_module_status(batch_xml, SEND_CONF) node_report['errormessage'] = CLUNODE_CREATE_ERRORS[SEND_CONF] + err_msg + elif creation_status == -(ENABLE_SVC_TASK): + node_report['iserror'] = True + (err_code, err_msg) = extract_module_status(batch_xml, DISABLE_SVC_TASK) + node_report['errormessage'] = CLUNODE_CREATE_ERRORS[ENABLE_SVC_TASK] + err_msg elif creation_status == -(START_NODE): node_report['iserror'] = True (err_code, err_msg) = extract_module_status(batch_xml, START_NODE) @@ -3057,7 +3619,13 @@ else: node_report['iserror'] = True node_report['errormessage'] = CLUNODE_CREATE_ERRORS[0] - clusterfolder.manage_delObjects(item[0]) + + try: + clusterfolder.manage_delObjects(item[0]) + except Exception, e: + luci_log.debug_verbose('ICB14: delObjects: %s: %s' \ + % (item[0], str(e))) + nodereports.append(node_report) continue else: #either batch completed successfully, or still running @@ -3069,7 +3637,7 @@ try: clusterfolder.manage_delObjects(item[0]) except Exception, e: - luci_log.info('ICB14: Unable to delete %s: %s' % (item[0], str(e))) + luci_log.info('ICB15: Unable to delete %s: %s' % (item[0], str(e))) continue else: map['busy'] = "true" @@ -3079,23 +3647,41 @@ nodereports.append(node_report) propslist = list() propslist.append(LAST_STATUS) - item[1].manage_delProperties(propslist) - item[1].manage_addProperty(LAST_STATUS,creation_status, "int") + try: + item[1].manage_delProperties(propslist) + item[1].manage_addProperty(LAST_STATUS, creation_status, "int") + except Exception, e: + luci_log.debug_verbose('ICB16: last_status err: %s %d: %s' \ + % (item[0], creation_status, str(e))) continue else: node_report = {} node_report['isnodecreation'] = False ricci = item[0].split("____") #This removes the 'flag' suffix - rc = RicciCommunicator(ricci[0]) - finished = checkBatch(rc, item[1].getProperty(BATCH_ID)) + + try: + rc = RicciCommunicator(ricci[0]) + except Exception, e: + rc = None + finished = False + luci_log.debug_verbose('ICB15: ricci error: %s: %s' \ + % (ricci[0], str(e))) + + if rc is not None: + finished = checkBatch(rc, item[1].getProperty(BATCH_ID)) + if finished == True: - node_report['desc'] = item[1].getProperty(FLAG_DESC) + REDIRECT_MSG + flag_desc = item[1].getProperty(FLAG_DESC) + if flag_desc is None: + node_report['desc'] = REDIRECT_MSG + else: + node_report['desc'] = flag_desc + REDIRECT_MSG nodereports.append(node_report) try: clusterfolder.manage_delObjects(item[0]) except Exception, e: - luci_log.info('Unable to delete %s: %s' % (item[0], str(e))) + luci_log.info('ICB16: Unable to delete %s: %s' % (item[0], str(e))) else: node_report = {} map['busy'] = "true" @@ -3106,6 +3692,7 @@ if isBusy: part1 = req['ACTUAL_URL'] part2 = req['QUERY_STRING'] + dex = part2.find("&busyfirst") if dex != (-1): tmpstr = part2[:dex] #This strips off busyfirst var @@ -3113,11 +3700,14 @@ ###FIXME - The above assumes that the 'busyfirst' query var is at the ###end of the URL... wholeurl = part1 + "?" + part2 - #map['url'] = "5, url=" + req['ACTUAL_URL'] + "?" + req['QUERY_STRING'] map['refreshurl'] = "5; url=" + wholeurl req['specialpagetype'] = "1" else: - map['refreshurl'] = '5; url=\".\"' + try: + query = req['QUERY_STRING'].replace('&busyfirst=true', '') + map['refreshurl'] = '5; url=' + req['ACTUAL_URL'] + '?' + query + except: + map['refreshurl'] = '5; url=/luci/cluster?pagetype=3' return map def getClusterOS(self, rc): @@ -3145,15 +3735,12 @@ try: cluname = request['clustername'] - except KeyError, e: + except: try: cluname = request.form['clustername'] except: luci_log.debug_verbose('getResourcesInfo missing cluster name') return resList - except: - luci_log.debug_verbose('getResourcesInfo missing cluster name') - return resList for item in modelb.getResources(): itemmap = {} @@ -3167,24 +3754,22 @@ def getResourceInfo(modelb, request): if not modelb: - luci_log.debug_verbose('no modelb obj in getResourceInfo') + luci_log.debug_verbose('GRI0: no modelb object in session') return {} name = None try: name = request['resourcename'] - except KeyError, e: + except: try: name = request.form['resourcename'] except: pass - except: - pass if name is None: try: - type = request.form['type'] - if type == 'ip': + res_type = request.form['type'] + if res_type == 'ip': name = request.form['value'].strip() except: pass @@ -3195,15 +3780,12 @@ try: cluname = request['clustername'] - except KeyError, e: + except: try: cluname = request.form['clustername'] except: luci_log.debug_verbose('getResourceInfo missing cluster name') return {} - except: - luci_log.debug_verbose('getResourceInfo missing cluster name') - return {} try: baseurl = request['URL'] @@ -3225,41 +3807,47 @@ continue def delResource(self, rc, request): - errstr = 'An error occurred in while attempting to set the cluster.conf' + errstr = 'An error occurred while attempting to set the new cluster.conf' try: modelb = request.SESSION.get('model') - except: - luci_log.debug_verbose('delRes unable to extract model from SESSION') + except Exception, e: + luci_log.debug_verbose('delResource0: no model: %s' % str(e)) return errstr + name = None try: name = request['resourcename'] - except KeyError, e: + except: try: name = request.form['resourcename'] except: - luci_log.debug_verbose('delRes missing resname %s' % str(e)) - return errstr + ': ' + str(e) - except: - luci_log.debug_verbose('delRes missing resname') - return errstr + ': ' + str(e) + pass + if name is None: + luci_log.debug_verbose('delResource1: no resource name') + return errstr + ': no resource name was provided.' + + clustername = None try: clustername = request['clustername'] - except KeyError, e: + except: try: clustername = request.form['clustername'] except: - luci_log.debug_verbose('delRes missing cluster name') - return errstr + ': could not determine the cluster name.' + pass + + if clustername is None: + luci_log.debug_verbose('delResource2: no cluster name for %s' % name) + return errstr + ': could not determine the cluster name.' try: ragent = rc.hostname() if not ragent: - raise - except: - return errstr + raise Exception, 'unable to determine the hostname of the ricci agent' + except Exception, e: + luci_log.debug_verbose('delResource3: %s: %s' % (errstr, str(e))) + return errstr + ': could not determine the ricci agent hostname' resPtr = modelb.getResourcesPtr() resources = resPtr.getChildren() @@ -3272,7 +3860,7 @@ break if not found: - luci_log.debug_verbose('delRes cant find res %s' % name) + luci_log.debug_verbose('delResource4: cant find res %s' % name) return errstr + ': the specified resource was not found.' try: @@ -3280,36 +3868,22 @@ if not conf: raise Exception, 'model string is blank' except Exception, e: - luci_log.debug_verbose('delRes: exportModelAsString failed: %s' % str(e)) + luci_log.debug_verbose('delResource5: exportModelAsString failed: %s' \ + % str(e)) return errstr batch_number, result = setClusterConf(rc, str(conf)) if batch_number is None or result is None: - luci_log.debug_verbose('delRes: missing batch and/or result from setClusterConf') + luci_log.debug_verbose('delResource6: missing batch and/or result') return errstr - modelstr = "" - path = CLUSTER_FOLDER_PATH + str(clustername) - clusterfolder = self.restrictedTraverse(path) - batch_id = str(batch_number) - objname = str(ragent) + '____flag' - objpath = str(path + '/' + objname) - try: - clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, RESOURCE_REMOVE, "string") - flag.manage_addProperty(FLAG_DESC, "Removing Resource \'" + request['resourcename'] + "\'", "string") + set_node_flag(self, clustername, ragent, str(batch_number), RESOURCE_REMOVE, "Removing Resource \'%s\'" % request['resourcename']) except Exception, e: - luci_log.debug('delRes: An error occurred while setting flag %s: %s' \ - % (objname, str(e))) - except: - luci_log.debug('delRes: An error occurred while setting flag %s' % objname) + luci_log.debug_verbose('delResource7: failed to set flags: %s' % str(e)) response = request.RESPONSE - response.redirect(request['HTTP_REFERER'] + "&busyfirst=true") + response.redirect(request['URL'] + "?pagetype=" + RESOURCES + "&clustername=" + clustername + '&busyfirst=true') def addIp(request, form=None): if form is None: @@ -3335,7 +3909,7 @@ return None else: try: - res = apply(Ip) + res = Ip() if not res: raise Exception, 'apply(Ip) is None' except Exception, e: @@ -3391,7 +3965,7 @@ return None else: try: - res = apply(Fs) + res = Fs() if not res: raise Exception, 'apply(Fs) is None' except Exception, e: @@ -3499,7 +4073,7 @@ return None else: try: - res = apply(Clusterfs) + res = Clusterfs() if not res: raise Exception, 'apply(Clusterfs) is None' except Exception, e: @@ -3586,7 +4160,7 @@ return None else: try: - res = apply(Netfs) + res = Netfs() except Exception, e: luci_log.debug_verbose('addNfsm error: %s' % str(e)) return None @@ -3681,7 +4255,7 @@ return None else: try: - res = apply(NFSClient) + res = NFSClient() except: luci_log.debug_verbose('addNfsc error: %s' % str(e)) return None @@ -3745,7 +4319,7 @@ return None else: try: - res = apply(NFSExport) + res = NFSExport() except: luci_log.debug_verbose('addNfsx error: %s', str(e)) return None @@ -3793,7 +4367,7 @@ return None else: try: - res = apply(Script) + res = Script() except Exception, e: luci_log.debug_verbose('addScr error: %s' % str(e)) return None @@ -3814,10 +4388,10 @@ luci_log.debug_verbose('addScr error: %s' % err) try: - file = form['file'].strip() - if not file: + path = form['file'].strip() + if not path: raise KeyError, 'file path is blank' - res.attr_hash['file'] = file + res.attr_hash['file'] = path except Exception, e: err = str(e) errors.append(err) @@ -3851,7 +4425,7 @@ return None else: try: - res = apply(Samba) + res = Samba() except Exception, e: luci_log.debug_verbose('addSmb error: %s' % str(e)) return None @@ -3900,7 +4474,7 @@ if not mb_nodes or not len(mb_nodes): raise Exception, 'node list is empty' except Exception, e: - luci_log.debug_verbose('no model builder nodes found for %s: %s' \ + luci_log.debug_verbose('RCC0: no model builder nodes found for %s: %s' \ % (str(e), clusterName)) return 'Unable to find cluster nodes for %s' % clusterName @@ -3909,17 +4483,18 @@ if not cluster_node: raise Exception, 'cluster node is none' except Exception, e: - luci_log.debug('cant find cluster node for %s: %s' + luci_log.debug('RCC1: cant find cluster node for %s: %s' % (clusterName, str(e))) return 'Unable to find an entry for %s in the Luci database.' % clusterName try: db_nodes = map(lambda x: x[0], cluster_node.objectItems('Folder')) if not db_nodes or not len(db_nodes): - raise - except: + raise Exception, 'no database nodes' + except Exception, e: # Should we just create them all? Can this even happen? - return 'Unable to find database entries for any nodes in ' + clusterName + luci_log.debug('RCC2: error: %s' % str(e)) + return 'Unable to find database entries for any nodes in %s' % clusterName same_host = lambda x, y: x == y or x[:len(y) + 1] == y + '.' or y[:len(x) + 1] == x + '.' @@ -3946,11 +4521,15 @@ messages = list() for i in missing_list: - cluster_node.delObjects([i]) - ## or alternately - #new_node = cluster_node.restrictedTraverse(i) - #setNodeFlag(self, new_node, CLUSTER_NODE_NOT_MEMBER) - messages.append('Node \"' + i + '\" is no longer in a member of cluster \"' + clusterName + '.\". It has been deleted from the management interface for this cluster.') + try: + ## or alternately + ##new_node = cluster_node.restrictedTraverse(i) + ##setNodeFlag(self, new_node, CLUSTER_NODE_NOT_MEMBER) + cluster_node.delObjects([i]) + messages.append('Node \"%s\" is no longer in a member of cluster \"%s\." It has been deleted from the management interface for this cluster.' % (i, clusterName)) + luci_log.debug_verbose('VCC3: deleted node %s' % i) + except Exception, e: + luci_log.debug_verbose('VCC4: delObjects: %s: %s' % (i, str(e))) new_flags = CLUSTER_NODE_NEED_AUTH | CLUSTER_NODE_ADDED for i in new_list: @@ -3958,69 +4537,66 @@ cluster_node.manage_addFolder(i, '__luci__:csystem:' + clusterName) new_node = cluster_node.restrictedTraverse(i) setNodeFlag(self, new_node, new_flags) - messages.append('A new node, \"' + i + ',\" is now a member of cluster \"' + clusterName + '.\" It has added to the management interface for this cluster, but you must authenticate to it in order for it to be fully functional.') - except: - messages.append('A new node, \"' + i + ',\" is now a member of cluster \"' + clusterName + ',\". but has not added to the management interface for this cluster as a result of an error creating the database entry.') + messages.append('A new cluster node, \"%s,\" is now a member of cluster \"%s.\" It has been added to the management interface for this cluster, but you must authenticate to it in order for it to be fully functional.' % (i, clusterName)) + except Exception, e: + messages.append('A new cluster node, \"%s,\" is now a member of cluster \"%s,\". but it has not been added to the management interface for this cluster as a result of an error creating a database entry for it.' % (i, clusterName)) + luci_log.debug_verbose('VCC5: addFolder: %s/%s: %s' \ + % (clusterName, i, str(e))) return messages -def addResource(self, request, modelb, res): +def addResource(self, request, modelb, res, res_type): clustername = modelb.getClusterName() if not clustername: - raise Exception, 'cluster name from modelb.getClusterName() is blank' + luci_log.debug_verbose('addResource0: no cluname from mb') + return 'Unable to determine cluster name' rc = getRicciAgent(self, clustername) if not rc: - raise Exception, 'Unable to find a ricci agent for the %s cluster' % clustername + luci_log.debug_verbose('addResource1: unable to find a ricci agent for cluster %s' % clustername) + return 'Unable to find a ricci agent for the %s cluster' % clustername - modelb.getResourcesPtr().addChild(res) + try: + modelb.getResourcesPtr().addChild(res) + except Exception, e: + luci_log.debug_verbose('addResource2: adding the new resource failed: %s' % str(e)) + return 'Unable to add the new resource' try: conf = modelb.exportModelAsString() if not conf: raise Exception, 'model string for %s is blank' % clustername except Exception, e: - luci_log.debug_verbose('addResource: exportModelAsString err: %s' % str(e)) + luci_log.debug_verbose('addResource3: exportModelAsString : %s' \ + % str(e)) return 'An error occurred while adding this resource' try: ragent = rc.hostname() if not ragent: - luci_log.debug_verbose('missing hostname') + luci_log.debug_verbose('addResource4: missing ricci hostname') raise Exception, 'unknown ricci agent hostname' - luci_log.debug_verbose('SENDING NEW CLUSTER CONF: %s' % conf) + batch_number, result = setClusterConf(rc, str(conf)) if batch_number is None or result is None: - luci_log.debug_verbose('missing batch_number or result') - raise Exception, 'batch_number or results is None from setClusterConf' + luci_log.debug_verbose('addResource5: missing batch_number or result') + raise Exception, 'unable to save the new cluster configuration.' except Exception, e: + luci_log.debug_verbose('addResource6: %s' % str(e)) return 'An error occurred while propagating the new cluster.conf: %s' % str(e) - path = str(CLUSTER_FOLDER_PATH + clustername) - clusterfolder = self.restrictedTraverse(path) - batch_id = str(batch_number) - objname = str(ragent + '____flag') - objpath = str(path + '/' + objname) + if res_type != 'ip': + res_name = res.attr_hash['name'] + else: + res_name = res.attr_hash['address'] try: - clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) - #Now we need to annotate the new DB object - flag = self.restrictedTraverse(objpath) - flag.manage_addProperty(BATCH_ID, batch_id, "string") - flag.manage_addProperty(TASKTYPE, RESOURCE_ADD, "string") - - if type != 'ip': - flag.manage_addProperty(FLAG_DESC, "Creating New Resource \'" + res.attr_hash['name'] + "\'", "string") - else: - flag.manage_addProperty(FLAG_DESC, "Creating New Resource \'" + res.attr_hash['address'] + "\'", "string") + set_node_flag(self, clustername, ragent, str(batch_number), RESOURCE_ADD, "Creating New Resource \'%s\'" % res_name) except Exception, e: - try: - luci_log.info('Unable to create flag %s: %s' % (objpath, str(e))) - except: - pass + luci_log.debug_verbose('addResource7: failed to set flags: %s' % str(e)) response = request.RESPONSE - response.redirect(request['HTTP_REFERER'] + "&busyfirst=true") + response.redirect(request['URL'] + "?pagetype=" + RESOURCES + "&clustername=" + clustername + '&busyfirst=true') def getResourceForEdit(modelb, name): resPtr = modelb.getResourcesPtr() @@ -4048,21 +4624,26 @@ clusterfolder = self.restrictedTraverse(path) objs = clusterfolder.objectItems('Folder') except Exception, e: - luci_log.info('resolve_nodename failed for %s/%s: %s' \ + luci_log.info('RNN0: error for %s/%s: %s' \ % (nodename, clustername, str(e))) + return nodename for obj in objs: - if obj[0].find(nodename) != (-1): - return obj[0] + try: + if obj[0].find(nodename) != (-1): + return obj[0] + except: + continue - luci_log.info('resolve_nodename failed for %s/%s' % (nodename, clustername)) - return None + luci_log.info('RNN1: failed for %s/%s: nothing found' \ + % (nodename, clustername)) + return nodename def noNodeFlagsPresent(self, nodefolder, flagname, hostname): try: items = nodefolder.objectItems('ManagedSystem') except: - luci_log.debug('An error occurred while trying to list flags for cluster ' + nodefolder[0]) + luci_log.debug('NNFP0: error getting flags for %s' % nodefolder[0]) return None for item in items: @@ -4071,9 +4652,10 @@ #a flag already exists... try to delete it try: + # hostname must be a FQDN rc = RicciCommunicator(hostname) - except RicciError, e: - luci_log.info('Unable to connect to the ricci daemon: %s' % str(e)) + except Exception, e: + luci_log.info('NNFP1: ricci error %s: %s' % (hostname, str(e))) return None if not rc.authed(): @@ -4082,15 +4664,14 @@ setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH) except: pass - luci_log.info('Node %s is not authenticated' % item[0]) - return None + luci_log.info('NNFP2: %s not authenticated' % item[0]) finished = checkBatch(rc, item[1].getProperty(BATCH_ID)) if finished == True: try: nodefolder.manage_delObjects(item[0]) except Exception, e: - luci_log.info('manage_delObjects for %s failed: %s' \ + luci_log.info('NNFP3: manage_delObjects for %s failed: %s' \ % (item[0], str(e))) return None return True @@ -4100,22 +4681,62 @@ return True -def getModelBuilder(rc, isVirtualized): +def getModelBuilder(self, rc, isVirtualized): try: cluster_conf_node = getClusterConf(rc) if not cluster_conf_node: - raise - except: - luci_log.debug('unable to get cluster_conf_node in getModelBuilder') + raise Exception, 'getClusterConf returned None' + except Exception, e: + luci_log.debug_verbose('GMB0: unable to get cluster_conf_node in getModelBuilder: %s' % str(e)) return None try: modelb = ModelBuilder(0, None, None, cluster_conf_node) + if not modelb: + raise Exception, 'ModelBuilder returned None' except Exception, e: try: - luci_log.debug('An error occurred while trying to get modelb for conf \"%s\": %s' % (cluster_conf_node.toxml(), str(e))) + luci_log.debug_verbose('GMB1: An error occurred while trying to get modelb for conf \"%s\": %s' % (cluster_conf_node.toxml(), str(e))) except: - pass + luci_log.debug_verbose('GMB1: ModelBuilder failed') - modelb.setIsVirtualized(isVirtualized) + if modelb: + modelb.setIsVirtualized(isVirtualized) return modelb + +def getModelForCluster(self, clustername): + rc = getRicciAgent(self, clustername) + if not rc: + luci_log.debug_verbose('GMFC0: unable to find a ricci agent for %s' \ + % clustername) + return None + + try: + model = getModelBuilder(None, rc, rc.dom0()) + if not model: + raise Exception, 'model is none' + except Exception, e: + luci_log.debug_verbose('GMFC1: unable to get model builder for %s: %s' \ + % (clustername, str(e))) + return None + + return model + +def set_node_flag(self, cluname, agent, batchid, task, desc): + path = str(CLUSTER_FOLDER_PATH + cluname) + batch_id = str(batchid) + objname = str(agent + '____flag') + + try: + clusterfolder = self.restrictedTraverse(path) + clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname) + objpath = str(path + '/' + objname) + flag = self.restrictedTraverse(objpath) + flag.manage_addProperty(BATCH_ID, batch_id, 'string') + flag.manage_addProperty(TASKTYPE, task, 'string') + flag.manage_addProperty(FLAG_DESC, desc, 'string') + except Exception, e: + errmsg = 'SNF0: error creating flag (%s,%s,%s)@%s: %s' \ + % (batch_id, task, desc, objpath, str(e)) + luci_log.debug_verbose(errmsg) + raise Exception, errmsg --- conga/luci/site/luci/Extensions/conga_constants.py 2006/10/24 16:36:23 1.19.2.1 +++ conga/luci/site/luci/Extensions/conga_constants.py 2006/11/16 19:34:53 1.19.2.2 @@ -42,6 +42,13 @@ FENCEDEV_LIST="52" FENCEDEV_CONFIG="53" FENCEDEV="54" +CLUSTER_DAEMON="55" + +#Cluster tasks +CLUSTER_STOP = '1000' +CLUSTER_START = '1001' +CLUSTER_RESTART = '1002' +CLUSTER_DELETE = '1003' #General tasks NODE_LEAVE_CLUSTER="100" @@ -55,6 +62,13 @@ MULTICAST="203" QUORUMD="204" +PROPERTIES_TAB = 'tab' + +PROP_GENERAL_TAB = '1' +PROP_FENCE_TAB = '2' +PROP_MCAST_TAB = '3' +PROP_QDISK_TAB = '4' + PAGETYPE="pagetype" ACTIONTYPE="actiontype" TASKTYPE="tasktype" @@ -66,6 +80,9 @@ PATH_TO_PRIVKEY="/var/lib/luci/var/certs/privkey.pem" PATH_TO_CACERT="/var/lib/luci/var/certs/cacert.pem" +# Zope DB paths +CLUSTER_FOLDER_PATH = '/luci/systems/cluster/' + #Node states NODE_ACTIVE="0" NODE_INACTIVE="1" @@ -75,26 +92,36 @@ NODE_UNKNOWN_STR="Unknown State" #cluster/node create batch task index -INSTALL_TASK=1 -REBOOT_TASK=2 -SEND_CONF=3 -START_NODE=4 -RICCI_CONNECT_FAILURE=(-1000) +INSTALL_TASK = 1 +DISABLE_SVC_TASK = 2 +REBOOT_TASK = 3 +SEND_CONF = 4 +ENABLE_SVC_TASK = 5 +START_NODE = 6 +RICCI_CONNECT_FAILURE = (-1000) -RICCI_CONNECT_FAILURE_MSG="A problem was encountered connecting with this node. " +RICCI_CONNECT_FAILURE_MSG = "A problem was encountered connecting with this node. " #cluster/node create error messages -CLUNODE_CREATE_ERRORS = ["An unknown error occurred when creating this node: ", "A problem occurred when installing packages: ","A problem occurred when rebooting this node: ", "A problem occurred when propagating the configuration to this node: ", "A problem occurred when starting this node: "] +CLUNODE_CREATE_ERRORS = [ + "An unknown error occurred when creating this node: ", + "A problem occurred when installing packages: ", + "A problem occurred when disabling cluster services on this node: ", + "A problem occurred when rebooting this node: ", + "A problem occurred when propagating the configuration to this node: ", + "A problem occurred when enabling cluster services on this node: ", + "A problem occurred when starting this node: " +] #cluster/node create error status messages -PRE_INSTALL="The install state is not yet complete" -PRE_REBOOT="Installation complete, but reboot not yet complete" -PRE_CFG="Reboot stage successful, but configuration for the cluster is not yet distributed" -PRE_JOIN="Packages are installed and configuration has been distributed, but the node has not yet joined the cluster." +PRE_INSTALL = "The install state is not yet complete" +PRE_REBOOT = "Installation complete, but reboot not yet complete" +PRE_CFG = "Reboot stage successful, but configuration for the cluster is not yet distributed" +PRE_JOIN = "Packages are installed and configuration has been distributed, but the node has not yet joined the cluster." -POSSIBLE_REBOOT_MESSAGE="This node is not currently responding and is probably
rebooting as planned. This state should persist for 5 minutes or so..." +POSSIBLE_REBOOT_MESSAGE = "This node is not currently responding and is probably
rebooting as planned. This state should persist for 5 minutes or so..." -REDIRECT_MSG=" You will be redirected in 5 seconds. Please fasten your safety restraints." +REDIRECT_MSG = " You will be redirected in 5 seconds. Please fasten your safety restraints." # Homebase-specific constants @@ -112,7 +139,7 @@ CLUSTER_NODE_NOT_MEMBER = 0x02 CLUSTER_NODE_ADDED = 0x04 -PLONE_ROOT='luci' +PLONE_ROOT = 'luci' LUCI_DEBUG_MODE = 1 LUCI_DEBUG_VERBOSITY = 2 --- conga/luci/site/luci/Extensions/homebase_adapters.py 2006/11/01 22:06:55 1.34.2.5 +++ conga/luci/site/luci/Extensions/homebase_adapters.py 2006/11/16 19:34:53 1.34.2.6 @@ -1,23 +1,20 @@ -import string import re -import sys import os from AccessControl import getSecurityManager -from ZPublisher import HTTPRequest -import xml.dom import cgi -from ricci_defines import * +from conga_constants import PLONE_ROOT, CLUSTER_NODE_NEED_AUTH, \ + HOMEBASE_ADD_CLUSTER, HOMEBASE_ADD_CLUSTER_INITIAL, \ + HOMEBASE_ADD_SYSTEM, HOMEBASE_ADD_USER, \ + HOMEBASE_DEL_SYSTEM, HOMEBASE_DEL_USER, HOMEBASE_PERMS from ricci_bridge import getClusterConf -from ricci_communicator import RicciCommunicator -from ricci_communicator import CERTS_DIR_PATH +from ricci_communicator import RicciCommunicator, CERTS_DIR_PATH from clusterOS import resolveOSType -from conga_constants import * -from LuciSyslog import LuciSyslog, LuciSyslogError +from LuciSyslog import LuciSyslog try: luci_log = LuciSyslog() -except LuciSyslogError, e: +except: pass def siteIsSetup(self): @@ -27,8 +24,8 @@ except: pass return False -def strFilter(regex, replaceChar, str): - return re.sub(regex, replaceChar, str) +def strFilter(regex, replaceChar, arg): + return re.sub(regex, replaceChar, arg) def validateDelSystem(self, request): errors = list() @@ -74,6 +71,8 @@ try: user = self.portal_membership.getMemberById(userId) + if not user: + raise Exception, 'user %s does not exist' % userId except: return (False, {'errors': [ 'No such user: \"' + userId + '\"' ] }) @@ -138,8 +137,12 @@ rc = RicciCommunicator(host) rc.unauth() i['cur_auth'] = False - except: - pass + except Exception, e: + try: + luci_log.debug_verbose('unauth for %s failed: %s' \ + % (i['host'], str(e))) + except: + pass def nodeAuth(cluster, host, passwd): messages = list() @@ -531,7 +534,7 @@ i[1].manage_setLocalRoles(userId, roles) messages.append('Added permission for ' + userId + ' for cluster ' + i[0]) except: - errors.append('Failed to add permission for ' + userId + ' for cluster ' + i[0]) + errors.append('Failed to add permission for ' + userId + ' for cluster ' + i[0]) else: try: if user.has_role('View', i[1]): @@ -545,7 +548,7 @@ messages.append('Removed permission for ' + userId + ' for cluster ' + i[0]) except: - errors.append('Failed to remove permission for ' + userId + ' for cluster ' + i[0]) + errors.append('Failed to remove permission for ' + userId + ' for cluster ' + i[0]) storage = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/objectItems')('Folder') if not '__SYSTEM' in request.form: @@ -572,7 +575,7 @@ i[1].manage_setLocalRoles(userId, roles) messages.append('Added permission for ' + userId + ' for system ' + i[0]) except: - errors.append('Failed to add permission for ' + userId + ' for system ' + i[0]) + errors.append('Failed to add permission for ' + userId + ' for system ' + i[0]) else: try: if user.has_role('View', i[1]): @@ -586,7 +589,7 @@ messages.append('Removed permission for ' + userId + ' for system ' + i[0]) except: - errors.append('Failed to remove permission for ' + userId + ' for system ' + i[0]) + errors.append('Failed to remove permission for ' + userId + ' for system ' + i[0]) if len(errors) > 0: returnCode = False @@ -665,23 +668,25 @@ ] def userAuthenticated(self): - if (isAdmin(self) or getSecurityManager().getUser().has_role('Authenticated', self.restrictedTraverse(PLONE_ROOT))): - return True - + try: + if (isAdmin(self) or getSecurityManager().getUser().has_role('Authenticated', self.restrictedTraverse(PLONE_ROOT))): + return True + except Exception, e: + luci_log.debug_verbose('UA0: %s' % str(e)) return False def isAdmin(self): try: return getSecurityManager().getUser().has_role('Owner', self.restrictedTraverse(PLONE_ROOT)) - except: - pass + except Exception, e: + luci_log.debug_verbose('IA0: %s' % str(e)) return False def userIsAdmin(self, userId): try: return self.portal_membership.getMemberById(userId).has_role('Owner', self.restrictedTraverse(PLONE_ROOT)) - except: - pass + except Exception, e: + luci_log.debug_verbose('UIA0: %s: %s' % (userId, str(e))) return False def homebaseControlPost(self, request): @@ -698,15 +703,19 @@ if 'pagetype' in request.form: pagetype = int(request.form['pagetype']) else: - try: request.SESSION.set('checkRet', {}) - except: pass + try: + request.SESSION.set('checkRet', {}) + except: + pass return homebasePortal(self, request, '.', '0') try: validatorFn = formValidators[pagetype - 1] except: - try: request.SESSION.set('checkRet', {}) - except: pass + try: + request.SESSION.set('checkRet', {}) + except: + pass return homebasePortal(self, request, '.', '0') if validatorFn == validateAddClusterInitial or validatorFn == validateAddCluster: @@ -913,71 +922,111 @@ def getClusterSystems(self, clusterName): if isAdmin(self): - return self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName + '/objectItems')('Folder') + try: + return self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName + '/objectItems')('Folder') + except Exception, e: + luci_log.debug_verbose('GCS0: %s: %s' % (clusterName, str(e))) + return None try: i = getSecurityManager().getUser() if not i: - raise - except: + raise Exception, 'GCSMGU failed' + except Exception, e: + luci_log.debug_verbose('GCS1: %s: %s' % (clusterName, str(e))) return None - csystems = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName + '/objectItems')('Folder') - if not csystems: + try: + csystems = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName + '/objectItems')('Folder') + if not csystems or len(csystems) < 1: + return None + except Exception, e: + luci_log.debug_verbose('GCS2: %s: %s' % (clusterName, str(e))) return None allowedCSystems = list() for c in csystems: - if i.has_role('View', c[1]): - allowedCSystems.append(c) - return (c) + try: + if i.has_role('View', c[1]): + allowedCSystems.append(c) + except Exception, e: + luci_log.debug_verbose('GCS3: %s: %s: %s' \ + % (clusterName, c[0], str(e))) + + return allowedCSystems def getClusters(self): if isAdmin(self): - return self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/objectItems')('Folder') + try: + return self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/objectItems')('Folder') + except Exception, e: + luci_log.debug_verbose('GC0: %s' % str(e)) + return None try: i = getSecurityManager().getUser() if not i: - raise - except: + raise Exception, 'GSMGU failed' + except Exception, e: + luci_log.debug_verbose('GC1: %s' % str(e)) return None - clusters = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/objectItems')('Folder') - if not clusters: + try: + clusters = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/objectItems')('Folder') + if not clusters or len(clusters) < 1: + return None + except Exception, e: + luci_log.debug_verbose('GC2: %s' % str(e)) return None allowedClusters = list() for c in clusters: - if i.has_role('View', c[1]): - allowedClusters.append(c) + try: + if i.has_role('View', c[1]): + allowedClusters.append(c) + except Exception, e: + luci_log.debug_verbose('GC3: %s: %s' % (c[0], str(e))) return allowedClusters def getStorage(self): if isAdmin(self): - return self.restrictedTraverse(PLONE_ROOT + '/systems/storage/objectItems')('Folder') + try: + return self.restrictedTraverse(PLONE_ROOT + '/systems/storage/objectItems')('Folder') + except Exception, e: + luci_log.debug_verbose('GS0: %s' % str(e)) + return None + try: i = getSecurityManager().getUser() if not i: - return None - except: + raise Exception, 'GSMGU failed' + except Exception, e: + luci_log.debug_verbose('GS1: %s' % str(e)) return None - storage = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/objectItems')('Folder') - if not storage: + try: + storage = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/objectItems')('Folder') + if not storage or len(storage) < 1: + return None + except Exception, e: + luci_log.debug_verbose('GS2: %s' % str(e)) return None allowedStorage = list() for s in storage: - if i.has_role('View', s[1]): - allowedStorage.append(s) + try: + if i.has_role('View', s[1]): + allowedStorage.append(s) + except Exception, e: + luci_log.debug_verbose('GS3: %s' % str(e)) return allowedStorage def createSystem(self, host, passwd): try: exists = self.restrictedTraverse(PLONE_ROOT +'/systems/storage/' + host) - return 'Storage system \"' + host + '\" is already managed.' + luci_log.debug_verbose('CS0: %s already exists' % host) + return 'Storage system %s is already managed' % host except: pass @@ -986,49 +1035,52 @@ if rc is None: raise Exception, 'unknown error' except Exception, e: + luci_log.debug_verbose('CS1: %s: %s' % (host, str(e))) return 'Unable to establish a connection to the ricci agent on %s: %s' \ % (host, str(e)) try: if not rc.authed(): rc.auth(passwd) - except: - return 'Unable to communicate with the ricci agent on \"' + host + '\" for authentication' + except Exception, e: + luci_log.debug_verbose('CS2: %s: %s' % (host, str(e))) + return 'Unable to communicate with the ricci agent on %s for authentication' % host try: i = rc.authed() - except: - return 'Unable to authenticate to the ricci agent on \"' + host + '\"' + except Exception, e: + luci_log.debug_verbose('CS3 %s: %s' % (host, str(e))) + return 'Unable to authenticate to the ricci agent on %s' % host if i != True: - return 'Authentication for storage system \"' + host + '\" failed' - -# rhost = rc.system_name() -# if rhost and rhost != host and rhost[:9] != 'localhost' and rhost[:5] != '127.0': -# host = str(rhost) + return 'Authentication for storage system %s failed' % host try: - exists = self.restrictedTraverse(PLONE_ROOT +'/systems/storage/' + host) - return 'Storage system \"' + host + '\" is already managed.' + exists = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/' + host) + luci_log.debug_verbose('CS4 %s already exists' % host) + return 'Storage system %s is already managed' % host except: pass try: ssystem = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/') except Exception, e: - return 'Unable to create storage system %s: %s' % (host, str(e)) + luci_log.debug_verbose('CS5 %s: %s' % (host, str(e))) + return 'Unable to create storage system %s: %s' % host try: ssystem.manage_addFolder(host, '__luci__:system') newSystem = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/' + host) except Exception, e: - return 'Unable to create storage system %s: %s' % (host, str(e)) + luci_log.debug_verbose('CS6 %s: %s' % (host, str(e))) + return 'Unable to create DB entry for storage system %s' % host try: newSystem.manage_acquiredPermissions([]) - newSystem.manage_role('View', ['Access contents information','View']) + newSystem.manage_role('View', ['Access contents information', 'View']) except Exception, e: - return 'Unable to set permissions on storage system %s: %s' % (host, str(e)) + luci_log.debug_verbose('CS7 %s: %s' % (host, str(e))) + return 'Unable to set permissions on storage system %s' % host return None @@ -1036,26 +1088,27 @@ try: sessionData = request.SESSION.get('checkRet') nodeUnauth(sessionData['requestResults']['nodeList']) - except: - pass + except Exception, e: + luci_log.debug_verbose('AMC0: %s' % str(e)) def manageCluster(self, clusterName, nodeList): clusterName = str(clusterName) - luci_log.debug_verbose('manageCluster for %s' % clusterName) try: clusters = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/') if not clusters: raise Exception, 'cannot find the cluster entry in the DB' - except: + except Exception, e: nodeUnauth(nodeList) - return 'Unable to create cluster \"' + clusterName + '\": the cluster directory is missing.' + luci_log.debug_verbose('MC0: %s: %s' % (clusterName, str(e))) + return 'Unable to create cluster %s: the cluster directory is missing.' % clusterName try: newCluster = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName) if newCluster: nodeUnauth(nodeList) - return 'A cluster named \"' + clusterName + '\" is already managed by Luci' + luci_log.debug_verbose('MC1: cluster %s: already exists' % clusterName) + return 'A cluster named %s is already managed by Luci' % clusterName except: pass @@ -1063,20 +1116,22 @@ clusters.manage_addFolder(clusterName, '__luci__:cluster') newCluster = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName) if not newCluster: - raise Exception, 'unable to find cluster folder for %s' % clusterName + raise Exception, 'unable to create the cluster DB entry for %s' % clusterName except Exception, e: nodeUnauth(nodeList) + luci_log.debug_verbose('MC2: %s: %s' % (clusterName, str(e))) return 'Unable to create cluster %s: %s' % (clusterName, str(e)) try: newCluster.manage_acquiredPermissions([]) - newCluster.manage_role('View', ['Access Contents Information','View']) + newCluster.manage_role('View', ['Access Contents Information', 'View']) except Exception, e: + luci_log.debug_verbose('MC3: %s: %s' % (clusterName, str(e))) nodeUnauth(nodeList) try: clusters.manage_delObjects([clusterName]) - except: - pass + except Exception, e: + luci_log.debug_verbose('MC4: %s: %s' % (clusterName, str(e))) return 'Unable to set permissions on new cluster: %s: %s' % (clusterName, str(e)) try: @@ -1084,14 +1139,14 @@ if not cluster_os: raise KeyError, 'Cluster OS is blank' except KeyError, e: - luci_log.debug_verbose('Warning adding cluster %s: %s' \ - % (clusterName, str(e))) + luci_log.debug_verbose('MC5: %s: %s' % (clusterName, str(e))) cluster_os = 'rhel5' try: newCluster.manage_addProperty('cluster_os', cluster_os, 'string') - except: - pass # we were unable to set the OS property string on this cluster + except Exception, e: + luci_log.debug_verbose('MC5: %s: %s: %s' \ + % (clusterName, cluster_os, str(e))) for i in nodeList: #if 'ricci_host' in i: @@ -1103,15 +1158,19 @@ newCluster.manage_addFolder(host, '__luci__:csystem:' + clusterName) newSystem = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName + '/' + host) if not newSystem: - raise Exception, 'unable to create cluster system DB entry' + raise Exception, 'unable to create cluster system DB entry for node %s' % host newSystem.manage_acquiredPermissions([]) newSystem.manage_role('View', [ 'Access contents information' , 'View' ]) except Exception, e: nodeUnauth(nodeList) try: clusters.manage_delObjects([clusterName]) - except: - pass + except Exception, e: + luci_log.debug_verbose('MC6: %s: %s: %s' \ + % (clusterName, host, str(e))) + + luci_log.debug_verbose('MC7: %s: %s: %s' \ + % (clusterName, host, str(e))) return 'Unable to create cluster node %s for cluster %s: %s' \ % (host, clusterName, str(e)) @@ -1120,6 +1179,7 @@ if not ssystem: raise Exception, 'The storage DB entry is missing' except Exception, e: + luci_log.debug_verbose('MC8: %s: %s: %s' % (clusterName, host, str(e))) return 'Error adding storage node %s: %s' % (host, str(e)) # Only add storage systems if the cluster and cluster node DB @@ -1134,22 +1194,25 @@ # It's already there, as a storage system, no problem. exists = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/' + host) continue - except: pass + except: + pass try: ssystem.manage_addFolder(host, '__luci__:system') newSystem = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/' + host) newSystem.manage_acquiredPermissions([]) newSystem.manage_role('View', [ 'Access contents information' , 'View' ]) - except: pass + except Exception, e: + luci_log.debug_verbose('MC9: %s: %s: %s' % (clusterName, host, str(e))) def createClusterSystems(self, clusterName, nodeList): try: clusterObj = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName) if not clusterObj: raise Exception, 'cluster %s DB entry is missing' % clusterName - except: + except Exception, e: nodeUnauth(nodeList) + luci_log.debug_verbose('CCS0: %s: %s' % (clusterName, str(e))) return 'No cluster named \"' + clusterName + '\" is managed by Luci' for i in nodeList: @@ -1168,6 +1231,7 @@ newSystem.manage_role('View', [ 'Access contents information' , 'View' ]) except Exception, e: nodeUnauth(nodeList) + luci_log.debug_verbose('CCS1: %s: %s: %s' % (clusterName, host, str(e))) return 'Unable to create cluster node %s for cluster %s: %s' \ % (host, clusterName, str(e)) @@ -1176,8 +1240,7 @@ if not ssystem: raise Exception, 'storage DB entry is missing' except Exception, e: - luci_log.debug_verbose('Error: adding storage DB node for %s: %s' \ - % (host, str(e))) + luci_log.debug_verbose('CCS2: %s: %s' % (clusterName, host, str(e))) return # Only add storage systems if the and cluster node DB @@ -1192,14 +1255,16 @@ # It's already there, as a storage system, no problem. exists = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/' + host) continue - except: pass + except: + pass try: ssystem.manage_addFolder(host, '__luci__:system') newSystem = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/' + host) newSystem.manage_acquiredPermissions([]) newSystem.manage_role('View', [ 'Access contents information' , 'View' ]) - except: pass + except Exception, e: + luci_log.debug_verbose('CCS3: %s: %s' % (clusterName, host, str(e))) def delSystem(self, systemName): try: @@ -1207,6 +1272,7 @@ if not ssystem: raise Exception, 'storage DB entry is missing' except Exception, e: + luci_log.debug_verbose('delSystem0: %s: %s' % (systemName, str(e))) return 'Unable to find storage system %s: %s' % (systemName, str(e)) try: @@ -1216,27 +1282,33 @@ except Exception, e: try: ssystem.manage_delObjects([systemName]) - except: - return 'Unable to delete the storage system \"' + systemName + '\"' - luci_log.debug_verbose('ricci error for %s: %s' % (systemName, str(e))) + except Exception, e: + luci_log.debug_verbose('delSystem1: %s: %s' % (systemName, str(e))) + return 'Unable to delete the storage system %s' % systemName + luci_log.debug_verbose('delSystem2: %s: %s' % (systemName, str(e))) return # Only unauthenticate if the system isn't a member of # a managed cluster. cluster_info = rc.cluster_info() if not cluster_info[0]: - try: rc.unauth() - except: pass + try: + rc.unauth() + except: + pass else: try: newSystem = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + cluster_info[0] + '/' + systemName) except: - try: rc.unauth() - except: pass + try: + rc.unauth() + except: + pass try: ssystem.manage_delObjects([systemName]) except Exception, e: + luci_log.debug_verbose('delSystem3: %s: %s' % (systemName, str(e))) return 'Unable to delete storage system %s: %s' \ % (systemName, str(e)) @@ -1244,9 +1316,10 @@ try: clusters = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/') if not clusters: - raise - except: - return 'Unable to find cluster \"' + clusterName + '\"' + raise Exception, 'clusters DB entry is missing' + except Exception, e: + luci_log.debug_verbose('delCluster0: %s' % str(e)) + return 'Unable to find cluster %s' % clusterName err = delClusterSystems(self, clusterName) if err: @@ -1254,26 +1327,28 @@ try: clusters.manage_delObjects([clusterName]) - except: - return 'Unable to delete cluster \"' + clusterName + '\"' + except Exception, e: + luci_log.debug_verbose('delCluster1: %s' % str(e)) + return 'Unable to delete cluster %s' % clusterName def delClusterSystem(self, cluster, systemName): try: if not self.restrictedTraverse(PLONE_ROOT + '/systems/storage/' + systemName): raise except: + # It's not a storage system, so unauthenticate. try: rc = RicciCommunicator(systemName) rc.unauth() except Exception, e: - luci_log.debug_verbose('ricci error for %s: %s' \ + luci_log.debug_verbose('delClusterSystem0: ricci error for %s: %s' \ % (systemName, str(e))) try: cluster.manage_delObjects([systemName]) except Exception, e: err_str = 'Error deleting cluster object %s: %s' % (systemName, str(e)) - luci_log.debug_verbose(err_str) + luci_log.debug_verbose('delClusterSystem1: %s' % err_str) return err_str def delClusterSystems(self, clusterName): @@ -1285,7 +1360,7 @@ except Exception, e: luci_log.debug_verbose('delCluSysterms: error for %s: %s' \ % (clusterName, str(e))) - return 'Unable to find any systems for cluster \"' + clusterName + '\"' + return 'Unable to find any systems for cluster %s' % clusterName errors = '' for i in csystems: @@ -1297,34 +1372,65 @@ def getDefaultUser(self, request): try: user = request.form['userList'] - except: + except KeyError, e: try: user = request['user'] except: try: - user = self.portal_membership.listMembers()[0].getUserName() - except: + members = list() + members.extend(self.portal_membership.listMembers()) + members.sort() + user = members[0].getUserName() + except Exception, e: + luci_log.debug_verbose('getDefaultUser0: %s' % str(e)) user = None + if not user: + luci_log.debug_verbose('getDefaultUser1: user is none') return user def getUserPerms(self): perms = {} - for i in self.portal_membership.listMembers(): + + try: + members = list() + members.extend(self.portal_membership.listMembers()) + if len(members) < 1: + raise Exception, 'no portal members exist' + members.sort() + except Exception, e: + luci_log.debug_verbose('getUserPerms0: %s' % str(e)) + return {} + + for i in members: userName = i.getUserName() perms[userName] = {} perms[userName]['cluster'] = {} perms[userName]['storage'] = {} - clusters = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/objectItems')('Folder') - storage = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/objectItems')('Folder') + try: + clusters = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/objectItems')('Folder') + storage = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/objectItems')('Folder') + except Exception, e: + luci_log.debug_verbose('getUserPerms1: user %s: %s' % (userName, str(e))) + continue for c in clusters: - perms[userName]['cluster'][c[0]] = i.has_role('View', c[1]) - + try: + perms[userName]['cluster'][c[0]] = i.has_role('View', c[1]) + except Exception, e: + luci_log.debug_verbose('getUserPerms2: user %s, obj %s: %s' \ + % (userName, c[0], str(e))) + continue + for s in storage: - perms[userName]['storage'][s[0]] = i.has_role('View', s[1]) + try: + perms[userName]['storage'][s[0]] = i.has_role('View', s[1]) + except Exception, e: + luci_log.debug_verbose('getUserPerms2: user %s, obj %s: %s' \ + % (userName, s[0], str(e))) + continue return perms @@ -1397,39 +1503,52 @@ def getClusterNode(self, nodename, clustername): try: cluster_node = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + str(clustername) + '/' + str(nodename)) + if not cluster_node: + raise Exception, 'cluster node is none' return cluster_node - except: + except Exception, e: + luci_log.debug_verbose('getClusterNode0: %s %s: %s' \ + % (nodename, clustername, str(e))) return None def getStorageNode(self, nodename): try: storage_node = self.restrictedTraverse(PLONE_ROOT + '/systems/storage/' + '/' + str(nodename)) + if not storage_node: + raise Exception, 'storage node is none' return storage_node - except: + except Exception, e: + luci_log.debug_verbose('getStorageNode0: %s: %s' % (nodename, str(e))) return None def testNodeFlag(node, flag_mask): try: flags = node.getProperty('flags') + if flags is None: + return False return flags & flag_mask != 0 - except: - pass + except Exception, e: + luci_log.debug_verbose('testNodeFlag0: %s' % str(e)) return False def setNodeFlag(node, flag_mask): try: flags = node.getProperty('flags') + if flags is None: + flags = 0 node.manage_changeProperties({ 'flags': flags | flag_mask }) except: try: node.manage_addProperty('flags', flag_mask, 'int') - except: - pass + except Exception, e: + luci_log.debug_verbose('setNodeFlag0: %s' % str(e)) def delNodeFlag(node, flag_mask): try: flags = node.getProperty('flags') + if flags is None: + return if flags & flag_mask != 0: node.manage_changeProperties({ 'flags': flags & ~flag_mask }) - except: - pass + except Exception, e: + luci_log.debug_verbose('delNodeFlag0: %s' % str(e)) --- conga/luci/site/luci/Extensions/ricci_bridge.py 2006/11/01 22:06:55 1.30.2.6 +++ conga/luci/site/luci/Extensions/ricci_bridge.py 2006/11/16 19:34:53 1.30.2.7 @@ -18,7 +18,7 @@ return False try: - batchid = batch.getAttribute('batch_id') + dummy = batch.getAttribute('batch_id') result = batch.getAttribute('status') except: return False @@ -28,7 +28,8 @@ return False -def addClusterNodeBatch(cluster_name, +def addClusterNodeBatch(os_str, + cluster_name, install_base, install_services, install_shared_storage, @@ -65,13 +66,31 @@ need_reboot = install_base or install_services or install_shared_storage or install_LVS if need_reboot: + batch += '' + batch += '' + batch += '' + batch += '' + if os_str == 'rhel4': + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' batch += '' batch += '' batch += '' batch += '' else: - # need placeholder instead of reboot + # need 2 placeholders instead of disable services / reboot + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' batch += '' batch += '' @@ -95,6 +114,26 @@ batch += '' batch += '' + if need_reboot: + batch += '' + batch += '' + batch += '' + batch += '' + if os_str == 'rhel4': + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' + else: + # placeholder instead of enable services + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' batch += '' batch += '' @@ -114,13 +153,6 @@ install_LVS, upgrade_rpms): - if os_str == 'rhel5': - cluster_version = '5' - elif os_str == 'rhel4': - cluster_version = '4' - else: - cluster_version = 'unknown' - batch = '' batch += '' batch += '' @@ -149,13 +181,31 @@ need_reboot = install_base or install_services or install_shared_storage or install_LVS if need_reboot: + batch += '' + batch += '' + batch += '' + batch += '' + if os_str == 'rhel4': + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' batch += '' batch += '' batch += '' batch += '' else: - # need placeholder instead of reboot + # need 2 placeholders instead of disable services / reboot + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' batch += '' batch += '' @@ -195,6 +245,26 @@ batch += '' batch += '' + if need_reboot: + batch += '' + batch += '' + batch += '' + batch += '' + if os_str == 'rhel4': + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' + else: + # placeholder instead of enable services + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' + batch += '' batch += '' batch += '' @@ -276,7 +346,7 @@ return doc def getClusterStatusBatch(rc): - batch_str ='' + batch_str = '' ricci_xml = rc.batch_run(batch_str, async=False) if not ricci_xml or not ricci_xml.firstChild: @@ -308,7 +378,7 @@ def getNodeLogs(rc): errstr = 'log not accessible' - batch_str = '' + batch_str = '' ricci_xml = rc.batch_run(batch_str, async=False) if not ricci_xml: @@ -318,8 +388,8 @@ if not log_entries or len(log_entries) < 1: raise Exception, 'no log data is available.' except Exception, e: - 'Error retrieving log data from %s: %s' \ - % (rc.hostname(), str(e)) + luci_log.debug_verbose('Error retrieving log data from %s: %s' \ + % (rc.hostname(), str(e))) return None time_now = time() entry = '' @@ -357,7 +427,7 @@ return entry def nodeReboot(rc): - batch_str = '' + batch_str = '' ricci_xml = rc.batch_run(batch_str) return batchAttemptResult(ricci_xml) @@ -371,13 +441,13 @@ if purge == False: purge_conf = 'false' - batch_str = '' + batch_str = '' ricci_xml = rc.batch_run(batch_str) return batchAttemptResult(ricci_xml) def nodeFence(rc, nodename): - batch_str = '' + batch_str = '' ricci_xml = rc.batch_run(batch_str) return batchAttemptResult(ricci_xml) @@ -387,28 +457,48 @@ if cluster_startup == True: cstartup = 'true' - batch_str = '' + batch_str = '' ricci_xml = rc.batch_run(batch_str) return batchAttemptResult(ricci_xml) def startService(rc, servicename, preferrednode=None): if preferrednode != None: - batch_str = '' + batch_str = '' else: - batch_str = '' + batch_str = '' ricci_xml = rc.batch_run(batch_str) return batchAttemptResult(ricci_xml) +def updateServices(rc, enable_list, disable_list): + batch = '' + + if enable_list and len(enable_list) > 0: + batch += '' + for i in enable_list: + batch += '' % str(i) + batch += '' + + if disable_list and len(disable_list) > 0: + batch += '' + for i in disable_list: + batch += '' % str(i) + batch += '' + + if batch == '': + return None + ricci_xml = rc.batch_run(batch) + return batchAttemptResult(ricci_xml) + def restartService(rc, servicename): - batch_str = '' + batch_str = '' ricci_xml = rc.batch_run(batch_str) return batchAttemptResult(ricci_xml) def stopService(rc, servicename): - batch_str = '' + batch_str = '' ricci_xml = rc.batch_run(batch_str) return batchAttemptResult(ricci_xml) @@ -463,7 +553,6 @@ return None resultlist = list() - svc_node = None for node in varnode.childNodes: if node.nodeName == 'service': svchash = {} --- conga/luci/site/luci/Extensions/ricci_communicator.py 2006/11/01 22:06:55 1.9.2.3 +++ conga/luci/site/luci/Extensions/ricci_communicator.py 2006/11/16 19:34:53 1.9.2.4 @@ -1,10 +1,8 @@ -from time import * -from socket import * +from socket import socket, ssl, AF_INET, SOCK_STREAM import xml import xml.dom from xml.dom import minidom from LuciSyslog import LuciSyslog -from HelperFunctions import access_to_host_allowed CERTS_DIR_PATH = '/var/lib/luci/var/certs/' @@ -36,7 +34,7 @@ raise RicciError, 'Error connecting to %s:%d: unknown error' \ % (self.__hostname, self.__port) - luci_log.debug_verbose('Connected to %s:%d' \ + luci_log.debug_verbose('RC:init0: Connected to %s:%d' \ % (self.__hostname, self.__port)) try: self.ss = ssl(sock, self.__privkey_file, self.__cert_file) @@ -53,7 +51,7 @@ # receive ricci header hello = self.__receive() try: - luci_log.debug_verbose('Received header from %s: \"%s\"' \ + luci_log.debug_verbose('RC:init1: Received header from %s: \"%s\"' \ % (self.__hostname, hello.toxml())) except: pass @@ -69,34 +67,34 @@ def hostname(self): - luci_log.debug_verbose('[auth %d] reported hostname = %s' \ + luci_log.debug_verbose('RC:hostname: [auth %d] reported hostname = %s' \ % (self.__authed, self.__hostname)) return self.__hostname def authed(self): - luci_log.debug_verbose('reported authed = %d for %s' \ + luci_log.debug_verbose('RC:authed: reported authed = %d for %s' \ % (self.__authed, self.__hostname)) return self.__authed def system_name(self): - luci_log.debug_verbose('[auth %d] reported system_name = %s for %s' \ + luci_log.debug_verbose('RC:system_name: [auth %d] reported system_name = %s for %s' \ % (self.__authed, self.__reported_hostname, self.__hostname)) return self.__reported_hostname def cluster_info(self): - luci_log.debug_verbose('[auth %d] reported cluster_info = (%s,%s) for %s' \ + luci_log.debug_verbose('RC:cluster_info: [auth %d] reported cluster_info = (%s,%s) for %s' \ % (self.__authed, self.__cluname, self.__clualias, self.__hostname)) return (self.__cluname, self.__clualias) def os(self): - luci_log.debug_verbose('[auth %d] reported system_name = %s for %s' \ + luci_log.debug_verbose('RC:os: [auth %d] reported system_name = %s for %s' \ % (self.__authed, self.__os, self.__hostname)) return self.__os def dom0(self): - luci_log.debug_verbose('[auth %d] reported system_name = %s for %s' \ + luci_log.debug_verbose('RC:dom0: [auth %d] reported system_name = %s for %s' \ % (self.__authed, self.__dom0, self.__hostname)) return self.__dom0 def auth(self, password): if self.authed(): - luci_log.debug_verbose('already authenticated to %s' \ + luci_log.debug_verbose('RC:auth0: already authenticated to %s' \ % self.__hostname) return True @@ -113,7 +111,8 @@ resp = self.__receive() self.__authed = resp.firstChild.getAttribute('authenticated') == 'true' - luci_log.debug_verbose('auth call returning %d' % self.__authed) + luci_log.debug_verbose('RC:auth1: auth call returning %d' \ + % self.__authed) return self.__authed @@ -126,26 +125,26 @@ self.__send(doc) resp = self.__receive() - luci_log.debug_verbose('trying to unauthenticate to %s' \ + luci_log.debug_verbose('RC:unauth0: trying to unauthenticate to %s' \ % self.__hostname) try: ret = resp.firstChild.getAttribute('success') - luci_log.debug_verbose('unauthenticate returned %s for %s' \ + luci_log.debug_verbose('RC:unauth1: unauthenticate returned %s for %s' \ % (ret, self.__hostname)) if ret != '0': raise Exception, 'Invalid response' except: errstr = 'Error authenticating to host %s: %s' \ % (self.__hostname, str(ret)) - luci_log.debug(errstr) + luci_log.debug_verbose('RC:unauth2:' + errstr) raise RicciError, errstr return True def process_batch(self, batch_xml, async=False): try: - luci_log.debug_verbose('auth=%d to %s for batch %s [async=%d]' \ + luci_log.debug_verbose('RC:PB0: [auth=%d] to %s for batch %s [async=%d]' \ % (self.__authed, self.__hostname, batch_xml.toxml(), async)) except: pass @@ -171,7 +170,7 @@ try: self.__send(doc) except Exception, e: - luci_log.debug('Error sending XML \"%s\" to host %s' \ + luci_log.debug_verbose('RC:PB1: Error sending XML \"%s\" to host %s' \ % (doc.toxml(), self.__hostname)) raise RicciError, 'Error sending XML to host %s: %s' \ % (self.__hostname, str(e)) @@ -181,13 +180,13 @@ # receive response doc = self.__receive() try: - luci_log.debug_verbose('received from %s XML \"%s\"' \ + luci_log.debug_verbose('RC:PB2: received from %s XML \"%s\"' \ % (self.__hostname, doc.toxml())) except: pass if doc.firstChild.getAttribute('success') != '0': - luci_log.debug_verbose('batch command failed') + luci_log.debug_verbose('RC:PB3: batch command failed') raise RicciError, 'The last ricci command to host %s failed' \ % self.__hostname @@ -197,7 +196,7 @@ if node.nodeName == 'batch': batch_node = node.cloneNode(True) if batch_node == None: - luci_log.debug_verbose('batch node missing ') + luci_log.debug_verbose('RC:PB4: batch node missing ') raise RicciError, 'missing in ricci\'s response from %s' \ % self.__hostname @@ -206,23 +205,23 @@ def batch_run(self, batch_str, async=True): try: batch_xml_str = '' + batch_str + '' - luci_log.debug_verbose('attempting batch \"%s\" for host %s' \ + luci_log.debug_verbose('RC:BRun0: attempting batch \"%s\" for host %s' \ % (batch_xml_str, self.__hostname)) batch_xml = minidom.parseString(batch_xml_str).firstChild except Exception, e: - luci_log.debug('received invalid batch XML for %s: \"%s\"' \ - % (self.__hostname, batch_xml_str)) + luci_log.debug_verbose('RC:BRun1: received invalid batch XML for %s: \"%s\": %s' \ + % (self.__hostname, batch_xml_str, str(e))) raise RicciError, 'batch XML is malformed' try: ricci_xml = self.process_batch(batch_xml, async) try: - luci_log.debug_verbose('received XML \"%s\" from host %s in response to batch command.' \ + luci_log.debug_verbose('RC:BRun2: received XML \"%s\" from host %s in response to batch command.' \ % (ricci_xml.toxml(), self.__hostname)) except: pass except: - luci_log.debug('An error occurred while trying to process the batch job: %s' % batch_xml_str) + luci_log.debug_verbose('RC:BRun3: An error occurred while trying to process the batch job: \"%s\"' % batch_xml_str) return None doc = minidom.Document() @@ -230,7 +229,7 @@ return doc def batch_report(self, batch_id): - luci_log.debug_verbose('[auth=%d] asking for batchid# %s for host %s' \ + luci_log.debug_verbose('RC:BRep0: [auth=%d] asking for batchid# %s for host %s' \ % (self.__authed, batch_id, self.__hostname)) if not self.authed(): @@ -273,7 +272,7 @@ try: pos = self.ss.write(buff) except Exception, e: - luci_log.debug('Error sending XML \"%s\" to %s: %s' \ + luci_log.debug_verbose('RC:send0: Error sending XML \"%s\" to %s: %s' \ % (buff, self.__hostname, str(e))) raise RicciError, 'write error while sending XML to host %s' \ % self.__hostname @@ -282,7 +281,7 @@ % self.__hostname buff = buff[pos:] try: - luci_log.debug_verbose('Sent XML \"%s\" to host %s' \ + luci_log.debug_verbose('RC:send1: Sent XML \"%s\" to host %s' \ % (xml_doc.toxml(), self.__hostname)) except: pass @@ -304,19 +303,19 @@ # we haven't received all of the XML data yet. continue except Exception, e: - luci_log.debug('Error reading data from %s: %s' \ + luci_log.debug_verbose('RC:recv0: Error reading data from %s: %s' \ % (self.__hostname, str(e))) raise RicciError, 'Error reading data from host %s' % self.__hostname except: raise RicciError, 'Error reading data from host %s' % self.__hostname - luci_log.debug_verbose('Received XML \"%s\" from host %s' \ + luci_log.debug_verbose('RC:recv1: Received XML \"%s\" from host %s' \ % (xml_in, self.__hostname)) try: if doc == None: doc = minidom.parseString(xml_in) except Exception, e: - luci_log.debug('Error parsing XML \"%s" from %s' \ + luci_log.debug_verbose('RC:recv2: Error parsing XML \"%s" from %s' \ % (xml_in, str(e))) raise RicciError, 'Error parsing XML from host %s: %s' \ % (self.__hostname, str(e)) @@ -328,7 +327,7 @@ try: if doc.firstChild.nodeName != 'ricci': - luci_log.debug('Expecting \"ricci\" got XML \"%s\" from %s' % + luci_log.debug_verbose('RC:recv3: Expecting \"ricci\" got XML \"%s\" from %s' % (xml_in, self.__hostname)) raise Exception, 'Expecting first XML child node to be \"ricci\"' except Exception, e: @@ -346,7 +345,7 @@ try: return RicciCommunicator(hostname) except Exception, e: - luci_log.debug('Error creating a ricci connection to %s: %s' \ + luci_log.debug_verbose('RC:GRC0: Error creating a ricci connection to %s: %s' \ % (hostname, str(e))) return None pass @@ -396,7 +395,7 @@ def batch_status(batch_xml): if batch_xml.nodeName != 'batch': try: - luci_log.debug('Expecting an XML batch node. Got \"%s\"' \ + luci_log.debug_verbose('RC:BS0: Expecting an XML batch node. Got \"%s\"' \ % batch_xml.toxml()) except: pass @@ -416,10 +415,10 @@ last = last + 1 last = last - 2 * last try: - luci_log.debug_verbose('Returning (%d, %d) for batch_status(\"%s\")' \ + luci_log.debug_verbose('RC:BS1: Returning (%d, %d) for batch_status(\"%s\")' \ % (last, total, batch_xml.toxml())) except: - luci_log.debug_verbose('Returning last, total') + luci_log.debug_verbose('RC:BS2: Returning last, total') return (last, total) @@ -445,7 +444,7 @@ # * error_msg: error message def extract_module_status(batch_xml, module_num=1): if batch_xml.nodeName != 'batch': - luci_log.debug('Expecting \"batch\" got \"%s\"' % batch_xml.toxml()) + luci_log.debug_verbose('RC:EMS0: Expecting \"batch\" got \"%s\"' % batch_xml.toxml()) raise RicciError, 'Invalid XML node; expecting a batch node' c = 0 --- conga/make/version.in 2006/11/01 23:11:25 1.21.2.4 +++ conga/make/version.in 2006/11/16 19:34:53 1.21.2.5 @@ -1,2 +1,2 @@ VERSION=0.8 -RELEASE=23 +RELEASE=24 --- conga/ricci/modules/log/LogParser.cpp 2006/10/23 21:13:21 1.6.2.1 +++ conga/ricci/modules/log/LogParser.cpp 2006/11/16 19:34:53 1.6.2.2 @@ -165,7 +165,8 @@ set& get_files(const String& path_, - set& files) + set& files, + time_t age_time) { String path = utils::rstrip(utils::strip(path_), "/"); if (path.empty() || path.find_first_of(" ; & $ ` ? > < ' \" ; | \\ * \n \t") != path.npos) @@ -178,11 +179,12 @@ // throw String("unable to stat ") + path; return files; if (S_ISREG(st.st_mode)) { - files.insert(path); + if (st.st_mtime >= age_time) + files.insert(path); // get rotated logs for (int i=0; i<25; i++) - get_files(path + "." + utils::to_string(i), files); + get_files(path + "." + utils::to_string(i), files, age_time); return files; } else if (S_ISDIR(st.st_mode)) @@ -204,7 +206,7 @@ if (kid_path == "." || kid_path == "..") continue; kid_path = path + "/" + kid_path; - get_files(kid_path, files); + get_files(kid_path, files, age_time); } } catch ( ... ) { closedir(d); @@ -366,6 +368,13 @@ const list& paths) { set ret; + time_t age_time = time(NULL); + + if ((long long) age_time - age < 0) + age_time = 0; + else + age_time -= age; + // set of requested tags set req_tags(domains.begin(), domains.end()); @@ -375,10 +384,10 @@ for (list::const_iterator iter = paths.begin(); iter != paths.end(); iter++) - get_files(*iter, files); + get_files(*iter, files, age_time); if (files.empty()) { - get_files("/var/log/messages", files); - get_files("/var/log/syslog", files); + get_files("/var/log/messages", files, age_time); + get_files("/var/log/syslog", files, age_time); } // process log files