From mboxrd@z Thu Jan 1 00:00:00 1970 From: =?windows-1252?Q?Luk=E1=9A_Doktor?= Subject: Re: [PATCH 2/2] adds cgroup tests on KVM guests with first test Date: Thu, 03 Nov 2011 08:32:26 +0100 Message-ID: <4EB2438A.3090306@redhat.com> References: <1316708986-12045-1-git-send-email-ldoktor@redhat.com> <1316708986-12045-3-git-send-email-ldoktor@redhat.com> <4EB22EDF.6030203@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: autotest@test.kernel.org, kvm@vger.kernel.org, kvm-autotest@redhat.com, akong@redhat.com, lmr@redhat.com, jzupka@redhat.com To: Suqin Return-path: Received: from mx1.redhat.com ([209.132.183.28]:62468 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751875Ab1KCHcb (ORCPT ); Thu, 3 Nov 2011 03:32:31 -0400 In-Reply-To: <4EB22EDF.6030203@redhat.com> Sender: kvm-owner@vger.kernel.org List-ID: Dne 3.11.2011 07:04, Suqin napsal(a): > On 09/23/2011 12:29 AM, Lukas Doktor wrote: >> basic structure: >> * similar to general client/tests/cgroup/ test (imports from the >> cgroup_common.py) >> * uses classes for better handling >> * improved logging and error handling >> * checks/repair the guests after each subtest >> * subtest mapping is specified in test dictionary in cgroup.py >> * allows to specify tests/repetions in tests_base.cfg >> (cgroup_tests =3D "re1[:loops] re2[:loops] ...") >> >> TestBlkioBandwidthWeight{Read,Write}: >> * Two similar tests for blkio.weight functionality inside the gues= t=20 >> using >> direct io and virtio_blk driver >> * Function: >> 1) On 2 VMs adds small (10MB) virtio_blk disk >> 2) Assigns each to different cgroup and sets blkio.weight 100/1000 >> 3) Runs dd with flag=3Ddirect (read/write) from the virtio_blk dis= k >> repeatidly >> 4) After 1 minute checks the results. If the ratio is better then = 1:3, >> test passes >> >> Signed-off-by: Lukas Doktor >> --- >> client/tests/kvm/subtests.cfg.sample | 7 + >> client/tests/kvm/tests/cgroup.py | 316=20 >> ++++++++++++++++++++++++++++++++++ >> 2 files changed, 323 insertions(+), 0 deletions(-) >> create mode 100644 client/tests/cgroup/__init__.py >> create mode 100644 client/tests/kvm/tests/cgroup.py >> >> diff --git a/client/tests/cgroup/__init__.py=20 >> b/client/tests/cgroup/__init__.py >> new file mode 100644 >> index 0000000..e69de29 >> diff --git a/client/tests/kvm/subtests.cfg.sample=20 >> b/client/tests/kvm/subtests.cfg.sample >> index 74e550b..79e0656 100644 >> --- a/client/tests/kvm/subtests.cfg.sample >> +++ b/client/tests/kvm/subtests.cfg.sample >> @@ -848,6 +848,13 @@ variants: >> only Linux >> type =3D iofuzz >> >> + - cgroup: >> + type =3D cgroup >> + # cgroup_tests =3D "re1[:loops] re2[:loops] ..." >> + cgroup_tests =3D ".*:1" >> + vms +=3D " vm2" >> + extra_params +=3D " -snapshot" > > you run blkio with snapshot ? sometimes we need to group diff real=20 > guests not snapshot The actual tested disks are added inside the test with additional=20 parameter snapshot=3Doff. I'm using snapshot on the main disk only and=20 because the VM dies quite often (usually during cleanup part). > >> + >> - virtio_console: install setup image_copy=20 >> unattended_install.cdrom >> only Linux >> vms =3D '' >> diff --git a/client/tests/kvm/tests/cgroup.py=20 >> b/client/tests/kvm/tests/cgroup.py >> new file mode 100644 >> index 0000000..4d0ec43 >> --- /dev/null >> +++ b/client/tests/kvm/tests/cgroup.py >> @@ -0,0 +1,316 @@ >> +""" >> +cgroup autotest test (on KVM guest) >> +@author: Lukas Doktor >> +@copyright: 2011 Red Hat, Inc. >> +""" >> +import logging, re, sys, tempfile, time, traceback >> +from autotest_lib.client.common_lib import error >> +from autotest_lib.client.bin import utils >> +from autotest_lib.client.tests.cgroup.cgroup_common import Cgroup,=20 >> CgroupModules >> + >> +def run_cgroup(test, params, env): >> + """ >> + Tests the cgroup functions on KVM guests. >> + * Uses variable tests (marked by TODO comment) to map the subt= ests >> + """ >> + vms =3D None >> + tests =3D None >> + >> + # Tests >> + class _TestBlkioBandwidth: >> + """ >> + BlkioBandwidth dummy test >> + * Use it as a base class to an actual test! >> + * self.dd_cmd and attr '_set_properties' have to be=20 >> implemented >> + * It prepares 2 vms and run self.dd_cmd to simultaniously=20 >> stress the >> + machines. After 1 minute it kills the dd and gather the= =20 >> throughput >> + informations. >> + """ >> + def __init__(self, vms, modules): >> + """ >> + Initialization >> + @param vms: list of vms >> + @param modules: initialized cgroup module class >> + """ >> + self.vms =3D vms # Virt machines >> + self.modules =3D modules # cgroup module handl= er >> + self.blkio =3D Cgroup('blkio', '') # cgroup blkio ha= ndler >> + self.files =3D [] # Temporary files (files of virt = disks) >> + self.devices =3D [] # Temporary virt devices (PCI dri= ve=20 >> 1 per vm) >> + self.dd_cmd =3D None # DD command used to test the=20 >> throughput >> + >> + def cleanup(self): >> + """ >> + Cleanup >> + """ >> + err =3D "" >> + try: >> + for i in range (2): >> + vms[i].monitor.cmd("pci_del %s" % self.devices[= i]) >> + self.files[i].close() >> + except Exception, inst: >> + err +=3D "\nCan't remove PCI drive: %s" % inst >> + try: >> + del(self.blkio) >> + except Exception, inst: >> + err +=3D "\nCan't remove Cgroup: %s" % inst >> + >> + if err: >> + logging.error("Some parts of cleanup failed:%s", er= r) >> + raise error.TestError("Some parts of cleanup=20 >> failed:%s" % err) >> + >> + def init(self): >> + """ >> + Initialization >> + * assigns vm1 and vm2 into cgroups and sets the proper= ties >> + * creates a new virtio device and adds it into vms >> + """ >> + if test.tagged_testname.find('virtio_blk') =3D=3D -1: >> + logging.warn("You are executing non-virtio_blk test= =20 >> but this " >> + "particular subtest uses manually adde= d " >> + "'virtio_blk' device.") > > emm... we can also run blkio test with ide. Anything except virtio_blk seems to ignore the flag=3Ddirect flag in dd= =2E I=20 tested this booth manually and using this test. Anyway if you are sure=20 it should be supported, I can use get_device_driver() function and use=20 the tested VM's driver. > >> + if not self.dd_cmd: >> + raise error.TestError("Corrupt class, aren't you=20 >> trying to run " >> + "parent _TestBlkioBandwidth()= =20 >> function?") >> + if len(self.vms)< 2: >> + raise error.TestError("Test needs at least 2 vms.") >> + >> + # cgroups >> + pwd =3D [] >> + blkio =3D self.blkio >> + if blkio.initialize(self.modules): >> + raise error.TestError("Could not initialize blkio=20 >> Cgroup") >> + for i in range(2): >> + pwd.append(blkio.mk_cgroup()) >> + if pwd[i] =3D=3D None: >> + raise error.TestError("Can't create cgroup") >> + if blkio.set_cgroup(self.vms[i].get_shell_pid(),=20 >> pwd[i]): >> + raise error.TestError("Could not set cgroup") >> + # Move all existing threads into cgroup >> + for tmp in utils.system_output("ps -L --ppid=3D%d -= o lwp" >> + %=20 >> self.vms[i].get_shell_pid()).split('\n')[1:]: >> + if blkio.set_cgroup(int(tmp), pwd[i]): >> + raise error.TestError("Could not set cgroup= ") >> + if self.blkio.set_property("blkio.weight", 100, pwd[0])= : > > it's better to set bandwidth configurable. and there maybe more than = 2=20 > groups. Yes, I can imagine parameter which defines cgroup values. > >> + raise error.TestError("Could not set blkio.weight") >> + if self.blkio.set_property("blkio.weight", 1000, pwd[1]= ): >> + raise error.TestError("Could not set blkio.weight") >> + >> + # Add dumm drives >> + for i in range(2): >> + self.files.append(tempfile.NamedTemporaryFile( >> + prefix=3D"cgroup-disk-", >> + suffix=3D".iso")) >> + utils.system("dd if=3D/dev/zero of=3D%s bs=3D1M=20 >> count=3D10&>/dev/null" >> + % (self.files[i].name)) >> + out =3D vms[i].monitor.cmd("pci_add auto storage=20 >> file=3D%s," >> + "if=3Dvirtio,snapshot=3Doff,cache=3D= off" >> + % (self.files[i].name)) >> + out =3D re.search(r'OK domain (\d+), bus (\d+), slo= t=20 >> (\d+), ' >> + 'function \d+', out).groups() >> + self.devices.append("%s:%s:%s" % out) >> + >> + >> + def run(self): >> + """ >> + Actual test: >> + * executes self.dd_cmd simultanously on both vms. >> + """ >> + sessions =3D [] >> + out =3D [] >> + sessions.append(vms[0].wait_for_login(timeout=3D30)) >> + sessions.append(vms[1].wait_for_login(timeout=3D30)) >> + sessions.append(vms[0].wait_for_login(timeout=3D30)) >> + sessions.append(vms[1].wait_for_login(timeout=3D30)) >> + sessions[0].sendline(self.dd_cmd) >> + sessions[1].sendline(self.dd_cmd) >> + time.sleep(60) >> + >> + cmd =3D "rm -f /tmp/cgroup_lock; killall -9 dd" >> + sessions[2].sendline(cmd) >> + sessions[3].sendline(cmd) >> + re_dd =3D (r'(\d+) bytes \(\d+\.*\d* \w*\) copied,=20 >> (\d+\.*\d*) s, ' >> + '\d+\.*\d* \w./s') >> + out =3D [] >> + for i in range(2): >> + out.append(sessions[i].read_up_to_prompt()) >> + out[i] =3D [int(_[0])/float(_[1]) >> + for _ in re.findall(re_dd, out[i])[1:-1= ]] >> + logging.debug("dd(%d) output: %s", i, out[i]) >> + out[i] =3D [min(out[i]), sum(out[i])/len(out[i]),=20 >> max(out[i]), >> + len(out[i])] >> + >> + for session in sessions: >> + session.close() >> + >> + logging.debug("dd values (min,avg,max,ddloops):\nout1:=20 >> %s\nout2: %s" >> + ,out[0], out[1]) >> + >> + out1 =3D out[0][1] >> + out2 =3D out[1][1] >> + # In theory out1 should be 10times smaller, than out2. >> + if out1*3> out2: >> + raise error.TestFail("dd values: %s:%s (1:%f), limi= t=20 >> 1:2.5" >> + ", theoretical: 1:10" >> + % (out1, out2, out2/out1)) >> + else: >> + logging.info("dd values: %s:%s (1:%s)", out1, out2,= =20 >> out2/out1) >> + >> + >> + >> + class TestBlkioBandwidthWeigthRead(_TestBlkioBandwidth): >> + """ >> + Tests the blkio.weight capability using simultanious read o= n=20 >> 2 vms >> + """ >> + def __init__(self, vms, modules): >> + """ >> + Initialization >> + @param vms: list of vms >> + @param modules: initialized cgroup module class >> + """ >> + _TestBlkioBandwidth.__init__(self, vms, modules) >> + self.dd_cmd =3D ("export FILE=3D$(ls /dev/vd* | tail -n= 1);=20 >> touch " >> + "/tmp/cgroup_lock ; while [ -e=20 >> /tmp/cgroup_lock ];" >> + "do dd if=3D$FILE of=3D/dev/null iflag=3D= direct=20 >> bs=3D100K;" >> + "done") >> + >> + >> + class TestBlkioBandwidthWeigthWrite(_TestBlkioBandwidth): >> + """ >> + Tests the blkio.weight capability using simultanious write=20 >> on 2 vms >> + """ >> + def __init__(self, vms, modules): >> + """ >> + Initialization >> + @param vms: list of vms >> + @param modules: initialized cgroup module class >> + """ >> + _TestBlkioBandwidth.__init__(self, vms, modules) >> + self.dd_cmd =3D ('export FILE=3D$(ls /dev/vd* | tail -n= 1);=20 >> touch ' >> + '/tmp/cgroup_lock ; while [ -e=20 >> /tmp/cgroup_lock ];' >> + 'do dd if=3D/dev/zero of=3D$FILE oflag=3D= direct=20 >> bs=3D100K;' >> + 'done') >> + >> + >> + def _check_vms(vms): >> + """ >> + Checks the vitality of VM >> + @param vms: list of vm's >> + """ >> + for i in range(len(vms)): >> + vms[i].verify_alive() >> + _ =3D vms[i].wait_for_login(timeout=3D60) >> + out =3D _.cmd_output("dmesg -c") >> + _.close() >> + del(_) >> + if out.find("BUG") !=3D -1: >> + logging.error("BUG occured in dmesg:\n%s", out) >> + logging.warn("recreate VM(%s)", i) >> + # The vm have to be recreate to reset the qemu PCI=20 >> state >> + vms[i].create() >> + >> + >> + # Setup >> + # TODO: Add all new tests here >> + tests =3D {"blkio_bandwidth_weigth_read" :=20 >> TestBlkioBandwidthWeigthRead, >> + "blkio_bandwidth_weigth_write" :=20 >> TestBlkioBandwidthWeigthWrite, >> + } >> + modules =3D CgroupModules() >> + if (modules.init(['cpuset', 'cpu', 'cpuacct', 'memory', 'device= s', >> + 'freezer', 'net_cls', 'blkio'])<=3D 0): >> + raise error.TestFail('Can\'t mount any cgroup modules') >> + # Add all vms >> + vms =3D [] >> + for vm in params.get("vms", "main_vm").split(): >> + vm =3D env.get_vm(vm) >> + vm.verify_alive() >> + timeout =3D int(params.get("login_timeout", 360)) >> + _ =3D vm.wait_for_login(timeout=3Dtimeout) >> + _.close() >> + del(_) >> + vms.append(vm) >> + >> + >> + # Execute tests >> + results =3D "" >> + # cgroup_tests =3D "re1[:loops] re2[:loops] ... ... ..." >> + for j in params.get("cgroup_tests").split(): >> + try: >> + loops =3D int(j[j.rfind(':')+1:]) >> + j =3D j[:j.rfind(':')] >> + except: >> + loops =3D 1 >> + for _loop in range(loops): >> + for i in [_ for _ in tests.keys() if re.match(j, _)]: >> + logging.info("%s: Entering the test", i) >> + try: >> + _check_vms(vms) >> + tst =3D tests[i](vms, modules) >> + tst.init() >> + tst.run() >> + except error.TestFail, inst: >> + logging.error("%s: Leaving, test FAILED=20 >> (TestFail): %s", >> + i, inst) >> + results +=3D "\n * %s: Test FAILED (TestFail): = %s"=20 >> % (i, inst) >> + try: >> + tst.cleanup() >> + except Exception, inst: >> + tmps =3D "" >> + for tmp in traceback.format_exception( >> + sys.exc_info()[0], >> + sys.exc_info()[1], >> + sys.exc_info()[2]): >> + tmps +=3D "%s cleanup: %s" % (i, tmp) >> + logging.info("%s: cleanup also failed\n%s",= =20 >> i, tmps) >> + except error.TestError, inst: >> + tmps =3D "" >> + for tmp in traceback.format_exception( >> + sys.exc_info()[0], >> + sys.exc_info()[1], >> + sys.exc_info()[2]): >> + tmps +=3D "%s: %s" % (i, tmp) >> + logging.error("%s: Leaving, test FAILED=20 >> (TestError): %s", >> + i, tmps) >> + results +=3D "\n * %s: Test FAILED (TestError):= =20 >> %s"% (i, inst) >> + try: >> + tst.cleanup() >> + except Exception, inst: >> + logging.warn("%s: cleanup also failed:=20 >> %s\n", i, inst) >> + except Exception, inst: >> + tmps =3D "" >> + for tmp in traceback.format_exception( >> + sys.exc_info()[0], >> + sys.exc_info()[1], >> + sys.exc_info()[2]): >> + tmps +=3D "%s: %s" % (i, tmp) >> + logging.error("%s: Leaving, test FAILED=20 >> (Exception): %s", >> + i, tmps) >> + results +=3D "\n * %s: Test FAILED (Exception):= =20 >> %s"% (i, inst) >> + try: >> + tst.cleanup() >> + except Exception, inst: >> + logging.warn("%s: cleanup also failed:=20 >> %s\n", i, inst) >> + else: >> + try: >> + tst.cleanup() >> + except Exception, inst: >> + tmps =3D "" >> + for tmp in traceback.format_exception( >> + sys.exc_info()[0], >> + sys.exc_info()[1], >> + sys.exc_info()[2]): >> + tmps +=3D "%s cleanup: %s" % (i, tmp) >> + logging.info("%s: Leaving, test passed but=20 >> cleanup " >> + "FAILED\n%s", i, tmps) >> + results +=3D ("\n * %s: Test passed but=20 >> cleanup FAILED" >> + % (i)) >> + else: >> + logging.info("%s: Leaving, test PASSED", i) >> + results +=3D "\n * %s: Test PASSED" % (i) >> + >> + logging.info("SUM: All tests finished (%d PASS / %d FAIL =3D %d= =20 >> TOTAL)%s", >> + results.count("PASSED"), results.count("FAILED"), >> + (results.count("PASSED")+results.count("FAILED")),= =20 >> results) >> + if results.count("FAILED"): >> + raise error.TestFail("Some subtests failed") >> + > Thanks for review, this test is already upstream (with some changes).=20 I'm going to finish couple more subtests (cpu, cpuset, memory) and than= =20 backport it into our virtlab. It should be straight forward as it's a=20 single-file test with few additional utils.py functions. Regards, Luk=E1=9A