From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1JmUIg-0005vj-VA for qemu-devel@nongnu.org; Thu, 17 Apr 2008 09:37:19 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1JmUIe-0005ur-Tu for qemu-devel@nongnu.org; Thu, 17 Apr 2008 09:37:18 -0400 Received: from [199.232.76.173] (helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1JmUIe-0005uW-I2 for qemu-devel@nongnu.org; Thu, 17 Apr 2008 09:37:16 -0400 Received: from ns.suse.de ([195.135.220.2] helo=mx1.suse.de) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1JmUId-0005ic-5r for qemu-devel@nongnu.org; Thu, 17 Apr 2008 09:37:16 -0400 Received: from Relay2.suse.de (mail2.suse.de [195.135.221.8]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.suse.de (Postfix) with ESMTP id 8EFAF404C6 for ; Thu, 17 Apr 2008 15:37:11 +0200 (CEST) Message-ID: <4807514B.9040607@suse.de> Date: Thu, 17 Apr 2008 15:31:55 +0200 From: Kevin Wolf MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------000006030403000809030804" Subject: [Qemu-devel] [PATCH] Align file accesses with cache=off (O_DIRECT) Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org This is a multi-part message in MIME format. --------------000006030403000809030804 Content-Type: text/plain; charset=ISO-8859-15 Content-Transfer-Encoding: 7bit In December a patch was applied which introduced the cache=off option to -drive. When using this option files are opened with the O_DIRECT flag. This means that all accesses have to be aligned. The patch made a couple of changes in this respect, still in other places they are missing (e.g. you can't use cache=off with qcow(2) files). This patch implements wrappers for raw_pread and raw_pwrite which align all file accesses and make qcow(2) work with cache=off. This method might not be the most performant one (compared to fixing qcow, qcow2 and everything else that might be using unaligned accesses), but unaligned accesses don't happen that frequently and with this patch really all image accesses should be covered. Signed-off-by: Kevin Wolf --------------000006030403000809030804 Content-Type: text/x-patch; name="align-odirect-accesses.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="align-odirect-accesses.patch" Index: block-raw-posix.c =================================================================== --- block-raw-posix.c (Revision 4215) +++ block-raw-posix.c (Arbeitskopie) @@ -77,6 +77,7 @@ typedef struct BDRVRawState { int fd; int type; + int flags; unsigned int lseek_err_cnt; #if defined(__linux__) /* linux floppy specific */ @@ -95,6 +96,7 @@ BDRVRawState *s = bs->opaque; int fd, open_flags, ret; + s->flags = flags; s->lseek_err_cnt = 0; open_flags = O_BINARY; @@ -141,7 +143,11 @@ #endif */ -static int raw_pread(BlockDriverState *bs, int64_t offset, +/* + * offset and count are in bytes, but must be multiples of 512 (for files + * opened with O_DIRECT). buf must be aligned to 512 bytes. + */ +static int raw_pread_aligned(BlockDriverState *bs, int64_t offset, uint8_t *buf, int count) { BDRVRawState *s = bs->opaque; @@ -194,7 +200,11 @@ return ret; } -static int raw_pwrite(BlockDriverState *bs, int64_t offset, +/* + * offset and count are in bytes, but must be multiples of 512 (for files + * opened with O_DIRECT). buf must be aligned to 512 bytes. + */ +static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset, const uint8_t *buf, int count) { BDRVRawState *s = bs->opaque; @@ -230,6 +240,92 @@ return ret; } + +#ifdef O_DIRECT +/* + * offset and count are in bytes and possibly not aligned. For files opened + * with O_DIRECT, necessary alignments are ensured before calling + * raw_pread_aligned to do the actual read. + */ +static int raw_pread(BlockDriverState *bs, int64_t offset, + uint8_t *buf, int count) +{ + BDRVRawState *s = bs->opaque; + + if (unlikely((s->flags & BDRV_O_DIRECT) && + (offset % 512 != 0 || (uintptr_t) buf % 512))) { + + uint8_t* aligned_buf; + int64_t aligned_offs = offset & ~511; + int aligned_count = (count + offset - aligned_offs + 511) & ~511; + int ret; + + aligned_buf = qemu_memalign(512, 512 * aligned_count); + ret = raw_pread_aligned(bs, aligned_offs, aligned_buf, aligned_count); + + if (ret > count) + ret = count; + if (ret > 0) + memcpy(buf, aligned_buf + (offset - aligned_offs), ret); + + qemu_free(aligned_buf); + return ret; + + } else { + return raw_pread_aligned(bs, offset, buf, count); + } +} + +/* + * offset and count are in bytes and possibly not aligned. For files opened + * with O_DIRECT, necessary alignments are ensured before calling + * raw_pwrite_aligned to do the actual write. + */ +static int raw_pwrite(BlockDriverState *bs, int64_t offset, + const uint8_t *buf, int count) +{ + BDRVRawState *s = bs->opaque; + + if (unlikely((s->flags & BDRV_O_DIRECT) && + (offset % 512 != 0 || (uintptr_t) buf % 512))) { + + uint8_t* aligned_buf; + int64_t aligned_offs = offset & ~511; + int aligned_count = (count + offset - aligned_offs + 511) & ~511; + int ret; + + aligned_buf = qemu_memalign(512, 512 * aligned_count); + + /* Read in the first block if needed */ + if (offset - aligned_offs != 0) + raw_pread_aligned(bs, aligned_offs, aligned_buf, 512); + + /* Read in the last block if needed */ + if ((aligned_count > 512) && ((offset + count) % 512 != 0)) + raw_pread_aligned(bs, + aligned_offs + aligned_count - 512, + aligned_buf + aligned_count - 512, + 512); + + memcpy(aligned_buf + (offset - aligned_offs), buf, count); + ret = raw_pwrite_aligned(bs, aligned_offs, aligned_buf, aligned_count); + + if (ret > count) + ret = count; + + qemu_free(aligned_buf); + return ret; + } else { + return raw_pwrite_aligned(bs, offset, buf, count); + } +} + +#else +#define raw_pread raw_pread_aligned +#define raw_pwrite raw_pwrite_aligned +#endif + + /***********************************************************/ /* Unix AIO using POSIX AIO */ Index: Makefile =================================================================== --- Makefile (Revision 4215) +++ Makefile (Arbeitskopie) @@ -34,7 +34,7 @@ ####################################################################### # BLOCK_OBJS is code used by both qemu system emulation and qemu-img -BLOCK_OBJS=cutils.o +BLOCK_OBJS=cutils.o osdep.o BLOCK_OBJS+=block-cow.o block-qcow.o aes.o block-vmdk.o block-cloop.o BLOCK_OBJS+=block-dmg.o block-bochs.o block-vpc.o block-vvfat.o BLOCK_OBJS+=block-qcow2.o block-parallels.o --------------000006030403000809030804--