From: Kevin Wolf <kwolf@suse.de>
To: Laurent Vivier <Laurent.Vivier@bull.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH] Align file accesses with cache=off (O_DIRECT)
Date: Wed, 30 Apr 2008 14:08:20 +0200 [thread overview]
Message-ID: <48186134.7070303@suse.de> (raw)
In-Reply-To: <1209549574.4312.27.camel@frecb07144>
[-- Attachment #1: Type: text/plain, Size: 210 bytes --]
Laurent Vivier schrieb:
> just a comment on the patch: perhaps you can call your field
> "open_flags" instead of "flags", and perhaps you can merge your field
> with "fd_open_flags" ?
Here you are. ;-)
Kevin
[-- Attachment #2: align-odirect-accesses.patch --]
[-- Type: text/x-patch, Size: 6358 bytes --]
Index: block-raw-posix.c
===================================================================
--- block-raw-posix.c.orig
+++ block-raw-posix.c
@@ -77,10 +77,10 @@
typedef struct BDRVRawState {
int fd;
int type;
+ int open_flags;
unsigned int lseek_err_cnt;
#if defined(__linux__)
/* linux floppy specific */
- int fd_open_flags;
int64_t fd_open_time;
int64_t fd_error_time;
int fd_got_error;
@@ -111,6 +111,7 @@ static int raw_open(BlockDriverState *bs
open_flags |= O_DIRECT;
#endif
+ s->open_flags = open_flags;
s->type = FTYPE_FILE;
fd = open(filename, open_flags, 0644);
@@ -141,7 +142,14 @@ static int raw_open(BlockDriverState *bs
#endif
*/
-static int raw_pread(BlockDriverState *bs, int64_t offset,
+/*
+ * offset and count are in bytes, but must be multiples of 512 for files
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
@@ -194,7 +202,14 @@ label__raw_read__success:
return ret;
}
-static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+/*
+ * offset and count are in bytes, but must be multiples of 512 for files
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
const uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
@@ -230,6 +245,67 @@ label__raw_write__success:
return ret;
}
+
+#ifdef O_DIRECT
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pread_aligned to do the actual read.
+ */
+static int raw_pread(BlockDriverState *bs, int64_t offset,
+ uint8_t *buf, int count)
+{
+ BDRVRawState *s = bs->opaque;
+
+ if (unlikely((s->open_flags & O_DIRECT) &&
+ (offset % 512 || count % 512 || (uintptr_t) buf % 512))) {
+
+ int ret;
+
+ // Temporarily disable O_DIRECT for unaligned access
+ fcntl(s->fd, F_SETFL, s->open_flags & ~O_DIRECT);
+ ret = raw_pread_aligned(bs, offset, buf, count);
+ fcntl(s->fd, F_SETFL, s->open_flags);
+
+ return ret;
+
+ } else {
+ return raw_pread_aligned(bs, offset, buf, count);
+ }
+}
+
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pwrite_aligned to do the actual write.
+ */
+static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+ const uint8_t *buf, int count)
+{
+ BDRVRawState *s = bs->opaque;
+
+ if (unlikely((s->open_flags & O_DIRECT) &&
+ (offset % 512 || count % 512 || (uintptr_t) buf % 512))) {
+
+ int ret;
+
+ // Temporarily disable O_DIRECT for unaligned access
+ fcntl(s->fd, F_SETFL, s->open_flags & ~O_DIRECT);
+ ret = raw_pwrite_aligned(bs, offset, buf, count);
+ fcntl(s->fd, F_SETFL, s->open_flags);
+
+ return ret;
+ } else {
+ return raw_pwrite_aligned(bs, offset, buf, count);
+ }
+}
+
+#else
+#define raw_pread raw_pread_aligned
+#define raw_pwrite raw_pwrite_aligned
+#endif
+
+
/***********************************************************/
/* Unix AIO using POSIX AIO */
@@ -402,10 +478,26 @@ static BlockDriverAIOCB *raw_aio_read(Bl
BlockDriverCompletionFunc *cb, void *opaque)
{
RawAIOCB *acb;
+ BDRVRawState *s = bs->opaque;
+
+ /*
+ * If O_DIRECT is used and the buffer is not aligned fall back
+ * to synchronous IO.
+ */
+ if (unlikely((s->open_flags & O_DIRECT) && ((uintptr_t) buf % 512))) {
+ int ret;
+
+ acb = qemu_aio_get(bs, cb, opaque);
+ ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
+ return &acb->common;
+ }
acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
if (!acb)
return NULL;
+
if (aio_read(&acb->aiocb) < 0) {
qemu_aio_release(acb);
return NULL;
@@ -418,6 +510,21 @@ static BlockDriverAIOCB *raw_aio_write(B
BlockDriverCompletionFunc *cb, void *opaque)
{
RawAIOCB *acb;
+ BDRVRawState *s = bs->opaque;
+
+ /*
+ * If O_DIRECT is used and the buffer is not aligned fall back
+ * to synchronous IO.
+ */
+ if (unlikely((s->open_flags & O_DIRECT) && ((uintptr_t) buf % 512))) {
+ int ret;
+
+ acb = qemu_aio_get(bs, cb, opaque);
+ ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
+ return &acb->common;
+ }
acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
if (!acb)
@@ -679,7 +786,7 @@ static int hdev_open(BlockDriverState *b
s->type = FTYPE_CD;
} else if (strstart(filename, "/dev/fd", NULL)) {
s->type = FTYPE_FD;
- s->fd_open_flags = open_flags;
+ s->open_flags = open_flags;
/* open will not fail even if no floppy is inserted */
open_flags |= O_NONBLOCK;
} else if (strstart(filename, "/dev/sg", NULL)) {
@@ -734,7 +841,7 @@ static int fd_open(BlockDriverState *bs)
#endif
return -EIO;
}
- s->fd = open(bs->filename, s->fd_open_flags);
+ s->fd = open(bs->filename, s->open_flags);
if (s->fd < 0) {
s->fd_error_time = qemu_get_clock(rt_clock);
s->fd_got_error = 1;
@@ -831,7 +938,7 @@ static int raw_eject(BlockDriverState *b
close(s->fd);
s->fd = -1;
}
- fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
+ fd = open(bs->filename, s->open_flags | O_NONBLOCK);
if (fd >= 0) {
if (ioctl(fd, FDEJECT, 0) < 0)
perror("FDEJECT");
next prev parent reply other threads:[~2008-04-30 12:13 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-04-17 13:31 [Qemu-devel] [PATCH] Align file accesses with cache=off (O_DIRECT) Kevin Wolf
2008-04-28 15:34 ` Kevin Wolf
2008-04-29 9:01 ` Laurent Vivier
2008-04-29 14:49 ` Kevin Wolf
2008-04-29 15:48 ` Laurent Vivier
2008-04-29 16:21 ` Kevin Wolf
2008-04-29 16:48 ` Laurent Vivier
2008-04-30 9:21 ` Kevin Wolf
2008-04-30 9:59 ` Laurent Vivier
2008-04-30 12:08 ` Kevin Wolf [this message]
2008-04-30 14:30 ` Blue Swirl
2008-04-30 21:05 ` Kevin Wolf
2008-05-01 14:35 ` Blue Swirl
2008-05-01 17:55 ` Kevin Wolf
2008-05-06 8:44 ` Kevin Wolf
2008-05-06 9:02 ` Laurent Vivier
2008-05-06 16:42 ` Blue Swirl
2008-05-06 16:56 ` Kevin Wolf
2008-05-06 17:23 ` Blue Swirl
2008-04-30 0:05 ` Jamie Lokier
2008-04-30 0:02 ` Jamie Lokier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=48186134.7070303@suse.de \
--to=kwolf@suse.de \
--cc=Laurent.Vivier@bull.net \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).