From mboxrd@z Thu Jan 1 00:00:00 1970 From: Bear Yang Subject: Re: [KVM-AUTOTEST][PATCH] timedrift support Date: Mon, 11 May 2009 18:40:14 +0800 Message-ID: <4A08008E.8060105@redhat.com> References: <4A010BCD.8060307@redhat.com> <20090506130247.GA5048@amt.cnet> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------070902090602070109090301" Cc: uril@redhat.com, kvm@vger.kernel.org To: Marcelo Tosatti Return-path: Received: from mx2.redhat.com ([66.187.237.31]:35511 "EHLO mx2.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751142AbZEKKjv (ORCPT ); Mon, 11 May 2009 06:39:51 -0400 Received: from int-mx2.corp.redhat.com (int-mx2.corp.redhat.com [172.16.27.26]) by mx2.redhat.com (8.13.8/8.13.8) with ESMTP id n4BAdqcO020072 for ; Mon, 11 May 2009 06:39:53 -0400 In-Reply-To: <20090506130247.GA5048@amt.cnet> Sender: kvm-owner@vger.kernel.org List-ID: This is a multi-part message in MIME format. --------------070902090602070109090301 Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit Hello. I have modified my script according Marcelo's suggestion. and resubmit my script to you all. :) Marcelo, Seems except you, no one care my script. I still want to say any suggestion on my script would be greatly appreciated. Thanks. Bear Marcelo Tosatti wrote: > Bear, > > Some comments below. > > On Wed, May 06, 2009 at 12:02:21PM +0800, Bear Yang wrote: > >> Hello everyone, >> >> I like to submit patch to add a new function for 'time drift check' for >> guest running on KVM. >> >> The TimeDrift design logic is below: >> 1. Set the host as the NTP server >> 2. Guest only sync it's clock with host *once* when it booted up. >> * if the offset value of ntpdate large than 1 sec, the guest will sync >> the clock with host. >> * if the offset value of ntpdate less than 1 sec, the guest doesn't need >> sync it's clock with host. >> >> 3. Then the cpu stress testing will running on guest. >> * a C program will give the real load to guest cpu >> 4.when the cpustress testing finished. running the commandline > -q host-ip> totally 20 times on guest to query the time from host and >> judge whether the guest clock has drift or not. >> >> The details of my patch is attached. >> >> thanks. >> >> Bear. >> > > >> diff -urN kvm_runtest_2.bak/cpu_stress.c kvm_runtest_2/cpu_stress.c >> --- kvm_runtest_2.bak/cpu_stress.c 1969-12-31 19:00:00.000000000 -0500 >> +++ kvm_runtest_2/cpu_stress.c 2009-05-05 22:35:34.000000000 -0400 >> @@ -0,0 +1,61 @@ >> +#define _GNU_SOURCE >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> + >> +#define MAX_CPUS 256 >> +#define BUFFSIZE 1024 >> + >> + >> +void worker_child(int cpu) >> +{ >> + int cur_freq; >> + int min_freq; >> + int max_freq; >> + int last_freq; >> + cpu_set_t mask; >> + int i; >> + double x; >> + int d = 0; >> + /* >> + * bind this thread to the specified cpu >> + */ >> + CPU_ZERO(&mask); >> + CPU_SET(cpu, &mask); >> + sched_setaffinity(0, CPU_SETSIZE, &mask); >> + >> + while (d++ != 500000) { >> + for (i=0; i<100000; i++) >> + x = sqrt(x); >> + } >> + >> + _exit(0); >> + >> +} >> + >> + >> +main() { >> + cpu_set_t mask; >> + int i; >> + int code; >> + >> + if (sched_getaffinity(0, CPU_SETSIZE, &mask) < 0){ >> + perror ("sched_getaffinity"); >> + exit(1); >> + } >> + >> + for (i=0; i> + if (CPU_ISSET(i, &mask)){ >> + printf ("CPU%d\n",i); >> + if (fork() == 0) >> + worker_child(i); >> + } >> + >> + >> + wait(&code); >> + exit (WEXITSTATUS(code)); >> +} >> diff -urN kvm_runtest_2.bak/kvm_runtest_2.py kvm_runtest_2/kvm_runtest_2.py >> --- kvm_runtest_2.bak/kvm_runtest_2.py 2009-04-29 06:17:29.000000000 -0400 >> +++ kvm_runtest_2/kvm_runtest_2.py 2009-04-29 08:06:32.000000000 -0400 >> @@ -36,6 +36,8 @@ >> "autotest": test_routine("kvm_tests", "run_autotest"), >> "kvm_install": test_routine("kvm_install", "run_kvm_install"), >> "linux_s3": test_routine("kvm_tests", "run_linux_s3"), >> + "ntp_server_setup": test_routine("kvm_tests", "run_ntp_server_setup"), >> + "timedrift": test_routine("kvm_tests", "run_timedrift"), >> } >> >> # Make it possible to import modules from the test's bindir >> diff -urN kvm_runtest_2.bak/kvm_tests.cfg.sample kvm_runtest_2/kvm_tests.cfg.sample >> --- kvm_runtest_2.bak/kvm_tests.cfg.sample 2009-04-29 06:17:29.000000000 -0400 >> +++ kvm_runtest_2/kvm_tests.cfg.sample 2009-04-29 08:09:36.000000000 -0400 >> @@ -81,6 +81,10 @@ >> - linux_s3: install setup >> type = linux_s3 >> >> + - ntp_server_setup: >> + type = ntp_server_setup >> + - timedrift: ntp_server_setup >> + type = timedrift >> # NICs >> variants: >> - @rtl8139: >> diff -urN kvm_runtest_2.bak/kvm_tests.py kvm_runtest_2/kvm_tests.py >> --- kvm_runtest_2.bak/kvm_tests.py 2009-04-29 06:17:29.000000000 -0400 >> +++ kvm_runtest_2/kvm_tests.py 2009-05-05 23:45:57.000000000 -0400 >> @@ -394,3 +394,235 @@ >> kvm_log.info("VM resumed after S3") >> >> session.close() >> + >> +def run_ntp_server_setup(test, params, env): >> + >> + """NTP server configuration and related network file modification >> + """ >> + kvm_log.debug("run ntp server setup") >> + status = 1 >> + # stop firewall for NTP server if it is running. >> + status = os.system("/etc/init.d/iptables status") >> + if status == 0: >> + os.system("/etc/init.d/iptables stop") >> + status = 1 >> + >> + # prevent dhcp client modify the ntp.conf >> + kvm_log.info("prevent dhcp client modify the ntp.conf") >> + >> + config_file = "/etc/sysconfig/network" >> + network_file = open("/etc/sysconfig/network", "a") >> + string = "PEERNTP=no" >> + >> + if os.system("grep %s %s" % (string, config_file)): >> + network_file.writelines(str(string)+'\n') >> + >> + network_file.close() >> + >> + # start ntp server on host >> + kvm_log.info("backup ntp config file") >> + >> + ntp_filename = os.path.expanduser("/etc/ntp.conf") >> + # backup ntp config file >> + backup_bootloader_filename = ntp_filename + "_backup" >> + if os.path.exists(ntp_filename): >> + os.rename(ntp_filename, backup_bootloader_filename) >> + >> + status = os.system("/etc/init.d/ntpd status") >> + if status == 0: >> + os.system("/etc/init.d/ntpd stop") >> + status = 1 >> + >> + kvm_log.info("start ntp server on host") >> + >> + ntp_cmd = ''' >> + echo "restrict default kod nomodify notrap nopeer noquery" >> /etc/ntp.conf;\ >> + echo "restrict 127.0.0.1" >> /etc/ntp.conf;\ >> + echo "driftfile /var/lib/ntp/drift" >> /etc/ntp.conf;\ >> + echo "keys /etc/ntp/keys" >> /etc/ntp.conf;\ >> + echo "server 127.127.1.0" >> /etc/ntp.conf;\ >> + echo "fudge 127.127.1.0 stratum 1" >> /etc/ntp.conf;\ >> + service ntpd start; >> + ''' >> > > I think it would be better to copy /etc/ntp.conf to a temporary file, > modify that, and start ntpd with the -c option. > > After the test is finished, restart ntpd with the default config (if it > was running) via service ntpd restart. > > Also I don't see whether your script reports the content of > > /sys/devices/system/clocksource/clocksource0/current_clocksource > > On the guest? Its important that information is displayed on the test > report. > > Looks fine to me other than that, but the kvm-autotest guys probably > have more comments. > > Thanks > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > --------------070902090602070109090301 Content-Type: text/x-patch; name="timedrift.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="timedrift.patch" diff -urN kvm_runtest_2.bak/cpu_stress.c kvm_runtest_2/cpu_stress.c --- kvm_runtest_2.bak/cpu_stress.c 1969-12-31 19:00:00.000000000 -0500 +++ kvm_runtest_2/cpu_stress.c 2009-05-05 22:35:34.000000000 -0400 @@ -0,0 +1,61 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +#define MAX_CPUS 256 +#define BUFFSIZE 1024 + + +void worker_child(int cpu) +{ + int cur_freq; + int min_freq; + int max_freq; + int last_freq; + cpu_set_t mask; + int i; + double x; + int d = 0; + /* + * bind this thread to the specified cpu + */ + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + sched_setaffinity(0, CPU_SETSIZE, &mask); + + while (d++ != 500000) { + for (i=0; i<100000; i++) + x = sqrt(x); + } + + _exit(0); + +} + + +main() { + cpu_set_t mask; + int i; + int code; + + if (sched_getaffinity(0, CPU_SETSIZE, &mask) < 0){ + perror ("sched_getaffinity"); + exit(1); + } + + for (i=0; i> /etc/timedrift.ntp.conf;\ + echo "restrict 127.0.0.1" >> /etc/timedrift.ntp.conf;\ + echo "driftfile /var/lib/ntp/drift" >> /etc/timedrift.ntp.conf;\ + echo "keys /etc/ntp/keys" >> /etc/timedrift.ntp.conf;\ + echo "server 127.127.1.0" >> /etc/timedrift.ntp.conf;\ + echo "fudge 127.127.1.0 stratum 1" >> /etc/timedrift.ntp.conf;\ + ntpd -c /etc/timedrift.ntp.conf; + ''' + if os.system(ntp_cmd): + raise error.TestFail, "NTP server has not starting correct..." + + #kvm_log.info("sync system clock to BIOS") + #os.system("/sbin/hwclock --systohc") + +def run_timedrift(test, params, env): + """judge wether the guest clock will encounter timedrift prblem or not. including three stages: + 1: try to sync the clock with host, if the offset value of guest clock is large than 1 sec. + 2: running the cpu stress testing program on guest + 3: then run analyze loop totally 20 times to determine if the clock on guest has time drift. + """ + # variables using in timedrift testcase + cpu_stress_program = "cpu_stress.c" + remote_dir = '/root' + + clock_resource_cmd = "cat /sys/devices/system/clocksource/clocksource0/current_clocksource" + + pwd = os.path.join(os.environ['AUTODIR'],'tests/kvm_runtest_2') + cpu_stress_test = os.path.join(pwd, cpu_stress_program) + cpu_stress_cmdline = 'cd %s;gcc %s -lm;./a.out &' % (remote_dir, os.path.basename(cpu_stress_test)) + + cpu_stress_search_cmdline = "ps -ef|grep 'a.out'|grep -v grep" + + hostname = os.environ.get("HOSTNAME") + if "localhost.localdomain" == hostname: + hostname = os.popen('hostname').read().split('\n')[0] + kvm_log.info("since get wrong hostname from python evnironment, then use the hostname get from system call(hostname).") + + kvm_log.info("get host name :%s" % hostname) + + # ntpdate info command and ntpdate sync command + ntpdate_info_cmd = "ntpdate -q %s" % hostname + ntpdate_sync_cmd = "ntpdate %s" % hostname + + # get vm handle + vm = kvm_utils.env_get_vm(env,params.get("main_vm")) + if not vm: + raise error.TestError, "VM object not found in environment" + if not vm.is_alive(): + raise error.TestError, "VM seems to be dead; Test requires a living VM" + + kvm_log.info("Waiting for guest to be up...") + + pxssh = kvm_utils.wait_for(vm.ssh_login, 240, 0, 2) + if not pxssh: + raise error.TestFail, "Could not log into guest" + + kvm_log.info("Logged into guest IN run_timedrift function.") + + # clock resource get from host and guest + host_clock_resource = os.popen(clock_resource_cmd).read().split('\n')[0] + kvm_log.info("the clock resource on host is :%s" % host_clock_resource) + + pxssh.sendline(clock_resource_cmd) + s, o = pxssh.read_up_to_prompt() + guest_clock_resource = o.splitlines()[-2] + kvm_log.info("the clock resource on guest is :%s" % guest_clock_resource) + + if host_clock_resource != guest_clock_resource: + #raise error.TestFail, "Host and Guest using different clock resource" + kvm_log.info("Host and Guest using different clock resource,Let's moving on.") + else: + kvm_log.info("Host and Guest using same clock resource,Let's moving on.") + + # helper function: + # ntpdate_op: a entire process to get ntpdate command line result from guest. + # time_drift_or_not: get the numeric handing by regular expression and make timedrift calulation. + def ntpdate_op(command): + output = [] + try: + pxssh = kvm_utils.wait_for(vm.ssh_login, 240, 0, 2) + if not pxssh: + raise error.TestFail, "Could not log into guest" + + kvm_log.info("Logged in:(ntpdate_op)") + + while True: + pxssh.sendline(command) + s, output = pxssh.read_up_to_prompt() + if "time server" in output: + # output is a string contain the (ntpdate -q) infor on guest + return True, output + else: + continue + except: + pxssh.close() + return False, output + return False, output + + def time_drift_or_not(output): + date_string = re.findall(r'offset [+-]?(.*) sec', output, re.M) + num = float(date_string[0]) + if num >= 1: + kvm_log.info("guest clock has drifted in this scenario :%s %s" % (date_string, num)) + return False + else: + kvm_log.info("guest clock running veracious in now stage :%s %s" % (date_string, num)) + return True + + # send the command and get the ouput from guest + # this loop will pick out several conditions need to be process + # Actually, we want to get the info match "time server", then script can analyzing it to + # determine if guest's clock need sync with host or not. + while True: + pxssh.sendline(ntpdate_info_cmd) + s, output = pxssh.read_up_to_prompt() + kvm_log.info("the ntpdate query info get from guest is below: \n%s" %output) + if ("no server suitable" not in output) and ("time server" not in output): + kvm_log.info("very creazying output got. let's try again") + continue + elif "no server suitable" in output: + kvm_log.info("seems NTP server is not ready for servicing") + time.sleep(30) + continue + elif "time server" in output: + # get the ntpdate info from guest + # kvm_log.info("Got the correct output for analyze. The output is below: \n%s" %output) + break + + kvm_log.info("get the ntpdate infomation from guest successfully :%s" % os.popen('date').read()) + + # judge the clock need to sync with host or not + while True: + date_string = re.findall(r'offset [+-]?(.*) sec', output, re.M) + num = float(date_string[0]) + if num >= 1: + kvm_log.info("guest need sync with the server: %s" % hostname) + s, output = ntpdate_op(ntpdate_sync_cmd) + if s: + continue + else: + #pxssh.sendline("hwclock --systohc") + #kvm_log.info("guest clock sync prcdure is finished. then sync the guest clock to guest bios.") + + #pxssh.sendline("hwclock --show") + #s, o = pxssh.read_up_to_prompt() + #kvm_log.info("the date infomation get from guest bios is :\n%s" % o) + + pxssh.sendline(ntpdate_info_cmd) + s, o = pxssh.read_up_to_prompt() + kvm_log.info("guest clock after sync with host is :\n%s" % o) + + break + + kvm_log.info("Timedrift Preparation *Finished* at last :%s" % os.popen('date').read()) + + if not vm.scp_to_remote(cpu_stress_test, remote_dir): + raise error.TestError, "Could not copy program to guest." + + pxssh.sendline(ntpdate_info_cmd) + s, o = pxssh.read_up_to_prompt() + kvm_log.info("the ntpdate query from host *BEFORE* running the cpu stress program.\n%s" % o) + pxssh.sendline(cpu_stress_cmdline) + s, o = pxssh.read_up_to_prompt() + kvm_log.info("running command line on guest and sleeping for 1200 secs.\n%s" % o) + + time.sleep(1200) + + while True: + if pxssh.get_command_status(cpu_stress_search_cmdline): + #(s, o) = pxssh.get_command_status_output(cpu_stress_search_cmdline) + #print "s is :%s" % s + #print "o is :%s" % o + #print "--------------------------------------------" + #aaa = pxssh.get_command_status(cpu_stress_search_cmdline) + #print "aaa is :%s" % aaa + #print "--------------------------------------------" + + print "stress testing process has been completed and quit." + break + else: + print "stress testing on CPU has not finished yet.waiting for next detect after sleep 60 secs." + time.sleep(60) + continue + + pxssh.sendline(ntpdate_info_cmd) + s, o = pxssh.read_up_to_prompt() + kvm_log.info("the ntpdate query from host *AFTER* running the cpu stress program.\n%s" % o) + + pxssh.close() + + # Sleep for analyze... + kvm_log.info("sleeping(180 secs) Starting... :%s" % os.popen('date').read()) + time.sleep(180) + kvm_log.info("wakeup to get the analyzing... :%s" % os.popen('date').read()) + count = 0 + for i in range(1, 21): + kvm_log.info("this is %s time to get clock info from guest." % i) + s, o = ntpdate_op(ntpdate_info_cmd) + + if not s: + raise error.TestFail, "Guest seems hang or ssh service based on guest has been crash down" + + if not time_drift_or_not(o): + count += 1 + + if count == 5: + raise error.TestFail, "TimeDrift testing Abort because guest's clock has drift too much" + + kvm_log.info("*********************** Sleep 30 seconds for next loop *************************") + time.sleep(60) + --------------070902090602070109090301--