From: Kevin Wolf <kwolf@suse.de>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH] Align file accesses with cache=off (O_DIRECT)
Date: Thu, 17 Apr 2008 15:31:55 +0200 [thread overview]
Message-ID: <4807514B.9040607@suse.de> (raw)
[-- Attachment #1: Type: text/plain, Size: 767 bytes --]
In December a patch was applied which introduced the cache=off option to
-drive. When using this option files are opened with the O_DIRECT flag.
This means that all accesses have to be aligned. The patch made a couple
of changes in this respect, still in other places they are missing (e.g.
you can't use cache=off with qcow(2) files).
This patch implements wrappers for raw_pread and raw_pwrite which align
all file accesses and make qcow(2) work with cache=off. This method
might not be the most performant one (compared to fixing qcow, qcow2 and
everything else that might be using unaligned accesses), but unaligned
accesses don't happen that frequently and with this patch really all
image accesses should be covered.
Signed-off-by: Kevin Wolf <kwolf@suse.de>
[-- Attachment #2: align-odirect-accesses.patch --]
[-- Type: text/x-patch, Size: 4674 bytes --]
Index: block-raw-posix.c
===================================================================
--- block-raw-posix.c (Revision 4215)
+++ block-raw-posix.c (Arbeitskopie)
@@ -77,6 +77,7 @@
typedef struct BDRVRawState {
int fd;
int type;
+ int flags;
unsigned int lseek_err_cnt;
#if defined(__linux__)
/* linux floppy specific */
@@ -95,6 +96,7 @@
BDRVRawState *s = bs->opaque;
int fd, open_flags, ret;
+ s->flags = flags;
s->lseek_err_cnt = 0;
open_flags = O_BINARY;
@@ -141,7 +143,11 @@
#endif
*/
-static int raw_pread(BlockDriverState *bs, int64_t offset,
+/*
+ * offset and count are in bytes, but must be multiples of 512 (for files
+ * opened with O_DIRECT). buf must be aligned to 512 bytes.
+ */
+static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
@@ -194,7 +200,11 @@
return ret;
}
-static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+/*
+ * offset and count are in bytes, but must be multiples of 512 (for files
+ * opened with O_DIRECT). buf must be aligned to 512 bytes.
+ */
+static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
const uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
@@ -230,6 +240,92 @@
return ret;
}
+
+#ifdef O_DIRECT
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pread_aligned to do the actual read.
+ */
+static int raw_pread(BlockDriverState *bs, int64_t offset,
+ uint8_t *buf, int count)
+{
+ BDRVRawState *s = bs->opaque;
+
+ if (unlikely((s->flags & BDRV_O_DIRECT) &&
+ (offset % 512 != 0 || (uintptr_t) buf % 512))) {
+
+ uint8_t* aligned_buf;
+ int64_t aligned_offs = offset & ~511;
+ int aligned_count = (count + offset - aligned_offs + 511) & ~511;
+ int ret;
+
+ aligned_buf = qemu_memalign(512, 512 * aligned_count);
+ ret = raw_pread_aligned(bs, aligned_offs, aligned_buf, aligned_count);
+
+ if (ret > count)
+ ret = count;
+ if (ret > 0)
+ memcpy(buf, aligned_buf + (offset - aligned_offs), ret);
+
+ qemu_free(aligned_buf);
+ return ret;
+
+ } else {
+ return raw_pread_aligned(bs, offset, buf, count);
+ }
+}
+
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pwrite_aligned to do the actual write.
+ */
+static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+ const uint8_t *buf, int count)
+{
+ BDRVRawState *s = bs->opaque;
+
+ if (unlikely((s->flags & BDRV_O_DIRECT) &&
+ (offset % 512 != 0 || (uintptr_t) buf % 512))) {
+
+ uint8_t* aligned_buf;
+ int64_t aligned_offs = offset & ~511;
+ int aligned_count = (count + offset - aligned_offs + 511) & ~511;
+ int ret;
+
+ aligned_buf = qemu_memalign(512, 512 * aligned_count);
+
+ /* Read in the first block if needed */
+ if (offset - aligned_offs != 0)
+ raw_pread_aligned(bs, aligned_offs, aligned_buf, 512);
+
+ /* Read in the last block if needed */
+ if ((aligned_count > 512) && ((offset + count) % 512 != 0))
+ raw_pread_aligned(bs,
+ aligned_offs + aligned_count - 512,
+ aligned_buf + aligned_count - 512,
+ 512);
+
+ memcpy(aligned_buf + (offset - aligned_offs), buf, count);
+ ret = raw_pwrite_aligned(bs, aligned_offs, aligned_buf, aligned_count);
+
+ if (ret > count)
+ ret = count;
+
+ qemu_free(aligned_buf);
+ return ret;
+ } else {
+ return raw_pwrite_aligned(bs, offset, buf, count);
+ }
+}
+
+#else
+#define raw_pread raw_pread_aligned
+#define raw_pwrite raw_pwrite_aligned
+#endif
+
+
/***********************************************************/
/* Unix AIO using POSIX AIO */
Index: Makefile
===================================================================
--- Makefile (Revision 4215)
+++ Makefile (Arbeitskopie)
@@ -34,7 +34,7 @@
#######################################################################
# BLOCK_OBJS is code used by both qemu system emulation and qemu-img
-BLOCK_OBJS=cutils.o
+BLOCK_OBJS=cutils.o osdep.o
BLOCK_OBJS+=block-cow.o block-qcow.o aes.o block-vmdk.o block-cloop.o
BLOCK_OBJS+=block-dmg.o block-bochs.o block-vpc.o block-vvfat.o
BLOCK_OBJS+=block-qcow2.o block-parallels.o
next reply other threads:[~2008-04-17 13:37 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-04-17 13:31 Kevin Wolf [this message]
2008-04-28 15:34 ` [Qemu-devel] [PATCH] Align file accesses with cache=off (O_DIRECT) Kevin Wolf
2008-04-29 9:01 ` Laurent Vivier
2008-04-29 14:49 ` Kevin Wolf
2008-04-29 15:48 ` Laurent Vivier
2008-04-29 16:21 ` Kevin Wolf
2008-04-29 16:48 ` Laurent Vivier
2008-04-30 9:21 ` Kevin Wolf
2008-04-30 9:59 ` Laurent Vivier
2008-04-30 12:08 ` Kevin Wolf
2008-04-30 14:30 ` Blue Swirl
2008-04-30 21:05 ` Kevin Wolf
2008-05-01 14:35 ` Blue Swirl
2008-05-01 17:55 ` Kevin Wolf
2008-05-06 8:44 ` Kevin Wolf
2008-05-06 9:02 ` Laurent Vivier
2008-05-06 16:42 ` Blue Swirl
2008-05-06 16:56 ` Kevin Wolf
2008-05-06 17:23 ` Blue Swirl
2008-04-30 0:05 ` Jamie Lokier
2008-04-30 0:02 ` Jamie Lokier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4807514B.9040607@suse.de \
--to=kwolf@suse.de \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).