From mboxrd@z Thu Jan 1 00:00:00 1970 From: Suqin Subject: Re: [PATCH 2/2] adds cgroup tests on KVM guests with first test Date: Thu, 03 Nov 2011 14:04:15 +0800 Message-ID: <4EB22EDF.6030203@redhat.com> References: <1316708986-12045-1-git-send-email-ldoktor@redhat.com> <1316708986-12045-3-git-send-email-ldoktor@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Cc: kvm@vger.kernel.org, autotest@test.kernel.org, kvm-autotest@redhat.com To: Lukas Doktor Return-path: In-Reply-To: <1316708986-12045-3-git-send-email-ldoktor@redhat.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: autotest-bounces@test.kernel.org Errors-To: autotest-bounces@test.kernel.org List-Id: kvm.vger.kernel.org On 09/23/2011 12:29 AM, Lukas Doktor wrote: > basic structure: > * similar to general client/tests/cgroup/ test (imports from the > cgroup_common.py) > * uses classes for better handling > * improved logging and error handling > * checks/repair the guests after each subtest > * subtest mapping is specified in test dictionary in cgroup.py > * allows to specify tests/repetions in tests_base.cfg > (cgroup_tests = "re1[:loops] re2[:loops] ...") > > TestBlkioBandwidthWeight{Read,Write}: > * Two similar tests for blkio.weight functionality inside the guest using > direct io and virtio_blk driver > * Function: > 1) On 2 VMs adds small (10MB) virtio_blk disk > 2) Assigns each to different cgroup and sets blkio.weight 100/1000 > 3) Runs dd with flag=direct (read/write) from the virtio_blk disk > repeatidly > 4) After 1 minute checks the results. If the ratio is better then 1:3, > test passes > > Signed-off-by: Lukas Doktor > --- > client/tests/kvm/subtests.cfg.sample | 7 + > client/tests/kvm/tests/cgroup.py | 316 ++++++++++++++++++++++++++++++++++ > 2 files changed, 323 insertions(+), 0 deletions(-) > create mode 100644 client/tests/cgroup/__init__.py > create mode 100644 client/tests/kvm/tests/cgroup.py > > diff --git a/client/tests/cgroup/__init__.py b/client/tests/cgroup/__init__.py > new file mode 100644 > index 0000000..e69de29 > diff --git a/client/tests/kvm/subtests.cfg.sample b/client/tests/kvm/subtests.cfg.sample > index 74e550b..79e0656 100644 > --- a/client/tests/kvm/subtests.cfg.sample > +++ b/client/tests/kvm/subtests.cfg.sample > @@ -848,6 +848,13 @@ variants: > only Linux > type = iofuzz > > + - cgroup: > + type = cgroup > + # cgroup_tests = "re1[:loops] re2[:loops] ..." > + cgroup_tests = ".*:1" > + vms += " vm2" > + extra_params += " -snapshot" > you run blkio with snapshot ? sometimes we need to group diff real guests not snapshot > + > - virtio_console: install setup image_copy unattended_install.cdrom > only Linux > vms = '' > diff --git a/client/tests/kvm/tests/cgroup.py b/client/tests/kvm/tests/cgroup.py > new file mode 100644 > index 0000000..4d0ec43 > --- /dev/null > +++ b/client/tests/kvm/tests/cgroup.py > @@ -0,0 +1,316 @@ > +""" > +cgroup autotest test (on KVM guest) > +@author: Lukas Doktor > +@copyright: 2011 Red Hat, Inc. > +""" > +import logging, re, sys, tempfile, time, traceback > +from autotest_lib.client.common_lib import error > +from autotest_lib.client.bin import utils > +from autotest_lib.client.tests.cgroup.cgroup_common import Cgroup, CgroupModules > + > +def run_cgroup(test, params, env): > + """ > + Tests the cgroup functions on KVM guests. > + * Uses variable tests (marked by TODO comment) to map the subtests > + """ > + vms = None > + tests = None > + > + # Tests > + class _TestBlkioBandwidth: > + """ > + BlkioBandwidth dummy test > + * Use it as a base class to an actual test! > + * self.dd_cmd and attr '_set_properties' have to be implemented > + * It prepares 2 vms and run self.dd_cmd to simultaniously stress the > + machines. After 1 minute it kills the dd and gather the throughput > + informations. > + """ > + def __init__(self, vms, modules): > + """ > + Initialization > + @param vms: list of vms > + @param modules: initialized cgroup module class > + """ > + self.vms = vms # Virt machines > + self.modules = modules # cgroup module handler > + self.blkio = Cgroup('blkio', '') # cgroup blkio handler > + self.files = [] # Temporary files (files of virt disks) > + self.devices = [] # Temporary virt devices (PCI drive 1 per vm) > + self.dd_cmd = None # DD command used to test the throughput > + > + def cleanup(self): > + """ > + Cleanup > + """ > + err = "" > + try: > + for i in range (2): > + vms[i].monitor.cmd("pci_del %s" % self.devices[i]) > + self.files[i].close() > + except Exception, inst: > + err += "\nCan't remove PCI drive: %s" % inst > + try: > + del(self.blkio) > + except Exception, inst: > + err += "\nCan't remove Cgroup: %s" % inst > + > + if err: > + logging.error("Some parts of cleanup failed:%s", err) > + raise error.TestError("Some parts of cleanup failed:%s" % err) > + > + def init(self): > + """ > + Initialization > + * assigns vm1 and vm2 into cgroups and sets the properties > + * creates a new virtio device and adds it into vms > + """ > + if test.tagged_testname.find('virtio_blk') == -1: > + logging.warn("You are executing non-virtio_blk test but this " > + "particular subtest uses manually added " > + "'virtio_blk' device.") > emm... we can also run blkio test with ide. > + if not self.dd_cmd: > + raise error.TestError("Corrupt class, aren't you trying to run " > + "parent _TestBlkioBandwidth() function?") > + if len(self.vms)< 2: > + raise error.TestError("Test needs at least 2 vms.") > + > + # cgroups > + pwd = [] > + blkio = self.blkio > + if blkio.initialize(self.modules): > + raise error.TestError("Could not initialize blkio Cgroup") > + for i in range(2): > + pwd.append(blkio.mk_cgroup()) > + if pwd[i] == None: > + raise error.TestError("Can't create cgroup") > + if blkio.set_cgroup(self.vms[i].get_shell_pid(), pwd[i]): > + raise error.TestError("Could not set cgroup") > + # Move all existing threads into cgroup > + for tmp in utils.system_output("ps -L --ppid=%d -o lwp" > + % self.vms[i].get_shell_pid()).split('\n')[1:]: > + if blkio.set_cgroup(int(tmp), pwd[i]): > + raise error.TestError("Could not set cgroup") > + if self.blkio.set_property("blkio.weight", 100, pwd[0]): > it's better to set bandwidth configurable. and there maybe more than 2 groups. > + raise error.TestError("Could not set blkio.weight") > + if self.blkio.set_property("blkio.weight", 1000, pwd[1]): > + raise error.TestError("Could not set blkio.weight") > + > + # Add dumm drives > + for i in range(2): > > + self.files.append(tempfile.NamedTemporaryFile( > + prefix="cgroup-disk-", > + suffix=".iso")) > + utils.system("dd if=/dev/zero of=%s bs=1M count=10&>/dev/null" > + % (self.files[i].name)) > + out = vms[i].monitor.cmd("pci_add auto storage file=%s," > + "if=virtio,snapshot=off,cache=off" > + % (self.files[i].name)) > + out = re.search(r'OK domain (\d+), bus (\d+), slot (\d+), ' > + 'function \d+', out).groups() > + self.devices.append("%s:%s:%s" % out) > + > + > + def run(self): > + """ > + Actual test: > + * executes self.dd_cmd simultanously on both vms. > + """ > + sessions = [] > + out = [] > + sessions.append(vms[0].wait_for_login(timeout=30)) > + sessions.append(vms[1].wait_for_login(timeout=30)) > + sessions.append(vms[0].wait_for_login(timeout=30)) > + sessions.append(vms[1].wait_for_login(timeout=30)) > + sessions[0].sendline(self.dd_cmd) > + sessions[1].sendline(self.dd_cmd) > + time.sleep(60) > + > + cmd = "rm -f /tmp/cgroup_lock; killall -9 dd" > + sessions[2].sendline(cmd) > + sessions[3].sendline(cmd) > + re_dd = (r'(\d+) bytes \(\d+\.*\d* \w*\) copied, (\d+\.*\d*) s, ' > + '\d+\.*\d* \w./s') > + out = [] > + for i in range(2): > + out.append(sessions[i].read_up_to_prompt()) > + out[i] = [int(_[0])/float(_[1]) > + for _ in re.findall(re_dd, out[i])[1:-1]] > + logging.debug("dd(%d) output: %s", i, out[i]) > + out[i] = [min(out[i]), sum(out[i])/len(out[i]), max(out[i]), > + len(out[i])] > + > + for session in sessions: > + session.close() > + > + logging.debug("dd values (min,avg,max,ddloops):\nout1: %s\nout2: %s" > + ,out[0], out[1]) > + > + out1 = out[0][1] > + out2 = out[1][1] > + # In theory out1 should be 10times smaller, than out2. > + if out1*3> out2: > + raise error.TestFail("dd values: %s:%s (1:%f), limit 1:2.5" > + ", theoretical: 1:10" > + % (out1, out2, out2/out1)) > + else: > + logging.info("dd values: %s:%s (1:%s)", out1, out2, out2/out1) > + > + > + > + class TestBlkioBandwidthWeigthRead(_TestBlkioBandwidth): > + """ > + Tests the blkio.weight capability using simultanious read on 2 vms > + """ > + def __init__(self, vms, modules): > + """ > + Initialization > + @param vms: list of vms > + @param modules: initialized cgroup module class > + """ > + _TestBlkioBandwidth.__init__(self, vms, modules) > + self.dd_cmd = ("export FILE=$(ls /dev/vd* | tail -n 1); touch " > + "/tmp/cgroup_lock ; while [ -e /tmp/cgroup_lock ];" > + "do dd if=$FILE of=/dev/null iflag=direct bs=100K;" > + "done") > + > + > + class TestBlkioBandwidthWeigthWrite(_TestBlkioBandwidth): > + """ > + Tests the blkio.weight capability using simultanious write on 2 vms > + """ > + def __init__(self, vms, modules): > + """ > + Initialization > + @param vms: list of vms > + @param modules: initialized cgroup module class > + """ > + _TestBlkioBandwidth.__init__(self, vms, modules) > + self.dd_cmd = ('export FILE=$(ls /dev/vd* | tail -n 1); touch ' > + '/tmp/cgroup_lock ; while [ -e /tmp/cgroup_lock ];' > + 'do dd if=/dev/zero of=$FILE oflag=direct bs=100K;' > + 'done') > + > + > + def _check_vms(vms): > + """ > + Checks the vitality of VM > + @param vms: list of vm's > + """ > + for i in range(len(vms)): > + vms[i].verify_alive() > + _ = vms[i].wait_for_login(timeout=60) > + out = _.cmd_output("dmesg -c") > + _.close() > + del(_) > + if out.find("BUG") != -1: > + logging.error("BUG occured in dmesg:\n%s", out) > + logging.warn("recreate VM(%s)", i) > + # The vm have to be recreate to reset the qemu PCI state > + vms[i].create() > + > + > + # Setup > + # TODO: Add all new tests here > + tests = {"blkio_bandwidth_weigth_read" : TestBlkioBandwidthWeigthRead, > + "blkio_bandwidth_weigth_write" : TestBlkioBandwidthWeigthWrite, > + } > + modules = CgroupModules() > + if (modules.init(['cpuset', 'cpu', 'cpuacct', 'memory', 'devices', > + 'freezer', 'net_cls', 'blkio'])<= 0): > + raise error.TestFail('Can\'t mount any cgroup modules') > + # Add all vms > + vms = [] > + for vm in params.get("vms", "main_vm").split(): > + vm = env.get_vm(vm) > + vm.verify_alive() > + timeout = int(params.get("login_timeout", 360)) > + _ = vm.wait_for_login(timeout=timeout) > + _.close() > + del(_) > + vms.append(vm) > + > + > + # Execute tests > + results = "" > + # cgroup_tests = "re1[:loops] re2[:loops] ... ... ..." > + for j in params.get("cgroup_tests").split(): > + try: > + loops = int(j[j.rfind(':')+1:]) > + j = j[:j.rfind(':')] > + except: > + loops = 1 > + for _loop in range(loops): > + for i in [_ for _ in tests.keys() if re.match(j, _)]: > + logging.info("%s: Entering the test", i) > + try: > + _check_vms(vms) > + tst = tests[i](vms, modules) > + tst.init() > + tst.run() > + except error.TestFail, inst: > + logging.error("%s: Leaving, test FAILED (TestFail): %s", > + i, inst) > + results += "\n * %s: Test FAILED (TestFail): %s" % (i, inst) > + try: > + tst.cleanup() > + except Exception, inst: > + tmps = "" > + for tmp in traceback.format_exception( > + sys.exc_info()[0], > + sys.exc_info()[1], > + sys.exc_info()[2]): > + tmps += "%s cleanup: %s" % (i, tmp) > + logging.info("%s: cleanup also failed\n%s", i, tmps) > + except error.TestError, inst: > + tmps = "" > + for tmp in traceback.format_exception( > + sys.exc_info()[0], > + sys.exc_info()[1], > + sys.exc_info()[2]): > + tmps += "%s: %s" % (i, tmp) > + logging.error("%s: Leaving, test FAILED (TestError): %s", > + i, tmps) > + results += "\n * %s: Test FAILED (TestError): %s"% (i, inst) > + try: > + tst.cleanup() > + except Exception, inst: > + logging.warn("%s: cleanup also failed: %s\n", i, inst) > + except Exception, inst: > + tmps = "" > + for tmp in traceback.format_exception( > + sys.exc_info()[0], > + sys.exc_info()[1], > + sys.exc_info()[2]): > + tmps += "%s: %s" % (i, tmp) > + logging.error("%s: Leaving, test FAILED (Exception): %s", > + i, tmps) > + results += "\n * %s: Test FAILED (Exception): %s"% (i, inst) > + try: > + tst.cleanup() > + except Exception, inst: > + logging.warn("%s: cleanup also failed: %s\n", i, inst) > + else: > + try: > + tst.cleanup() > + except Exception, inst: > + tmps = "" > + for tmp in traceback.format_exception( > + sys.exc_info()[0], > + sys.exc_info()[1], > + sys.exc_info()[2]): > + tmps += "%s cleanup: %s" % (i, tmp) > + logging.info("%s: Leaving, test passed but cleanup " > + "FAILED\n%s", i, tmps) > + results += ("\n * %s: Test passed but cleanup FAILED" > + % (i)) > + else: > + logging.info("%s: Leaving, test PASSED", i) > + results += "\n * %s: Test PASSED" % (i) > + > + logging.info("SUM: All tests finished (%d PASS / %d FAIL = %d TOTAL)%s", > + results.count("PASSED"), results.count("FAILED"), > + (results.count("PASSED")+results.count("FAILED")), results) > + if results.count("FAILED"): > + raise error.TestFail("Some subtests failed") > + >