From: Kevin Wolf <kwolf@suse.de>
To: Laurent Vivier <Laurent.Vivier@bull.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH] Align file accesses with cache=off (O_DIRECT)
Date: Wed, 30 Apr 2008 14:08:20 +0200 [thread overview]
Message-ID: <48186134.7070303@suse.de> (raw)
In-Reply-To: <1209549574.4312.27.camel@frecb07144>
[-- Attachment #1: Type: text/plain, Size: 210 bytes --]
Laurent Vivier schrieb:
> just a comment on the patch: perhaps you can call your field
> "open_flags" instead of "flags", and perhaps you can merge your field
> with "fd_open_flags" ?
Here you are. ;-)
Kevin
[-- Attachment #2: align-odirect-accesses.patch --]
[-- Type: text/x-patch, Size: 6358 bytes --]
Index: block-raw-posix.c
===================================================================
--- block-raw-posix.c.orig
+++ block-raw-posix.c
@@ -77,10 +77,10 @@
typedef struct BDRVRawState {
int fd;
int type;
+ int open_flags;
unsigned int lseek_err_cnt;
#if defined(__linux__)
/* linux floppy specific */
- int fd_open_flags;
int64_t fd_open_time;
int64_t fd_error_time;
int fd_got_error;
@@ -111,6 +111,7 @@ static int raw_open(BlockDriverState *bs
open_flags |= O_DIRECT;
#endif
+ s->open_flags = open_flags;
s->type = FTYPE_FILE;
fd = open(filename, open_flags, 0644);
@@ -141,7 +142,14 @@ static int raw_open(BlockDriverState *bs
#endif
*/
-static int raw_pread(BlockDriverState *bs, int64_t offset,
+/*
+ * offset and count are in bytes, but must be multiples of 512 for files
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
@@ -194,7 +202,14 @@ label__raw_read__success:
return ret;
}
-static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+/*
+ * offset and count are in bytes, but must be multiples of 512 for files
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
const uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
@@ -230,6 +245,67 @@ label__raw_write__success:
return ret;
}
+
+#ifdef O_DIRECT
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pread_aligned to do the actual read.
+ */
+static int raw_pread(BlockDriverState *bs, int64_t offset,
+ uint8_t *buf, int count)
+{
+ BDRVRawState *s = bs->opaque;
+
+ if (unlikely((s->open_flags & O_DIRECT) &&
+ (offset % 512 || count % 512 || (uintptr_t) buf % 512))) {
+
+ int ret;
+
+ // Temporarily disable O_DIRECT for unaligned access
+ fcntl(s->fd, F_SETFL, s->open_flags & ~O_DIRECT);
+ ret = raw_pread_aligned(bs, offset, buf, count);
+ fcntl(s->fd, F_SETFL, s->open_flags);
+
+ return ret;
+
+ } else {
+ return raw_pread_aligned(bs, offset, buf, count);
+ }
+}
+
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pwrite_aligned to do the actual write.
+ */
+static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+ const uint8_t *buf, int count)
+{
+ BDRVRawState *s = bs->opaque;
+
+ if (unlikely((s->open_flags & O_DIRECT) &&
+ (offset % 512 || count % 512 || (uintptr_t) buf % 512))) {
+
+ int ret;
+
+ // Temporarily disable O_DIRECT for unaligned access
+ fcntl(s->fd, F_SETFL, s->open_flags & ~O_DIRECT);
+ ret = raw_pwrite_aligned(bs, offset, buf, count);
+ fcntl(s->fd, F_SETFL, s->open_flags);
+
+ return ret;
+ } else {
+ return raw_pwrite_aligned(bs, offset, buf, count);
+ }
+}
+
+#else
+#define raw_pread raw_pread_aligned
+#define raw_pwrite raw_pwrite_aligned
+#endif
+
+
/***********************************************************/
/* Unix AIO using POSIX AIO */
@@ -402,10 +478,26 @@ static BlockDriverAIOCB *raw_aio_read(Bl
BlockDriverCompletionFunc *cb, void *opaque)
{
RawAIOCB *acb;
+ BDRVRawState *s = bs->opaque;
+
+ /*
+ * If O_DIRECT is used and the buffer is not aligned fall back
+ * to synchronous IO.
+ */
+ if (unlikely((s->open_flags & O_DIRECT) && ((uintptr_t) buf % 512))) {
+ int ret;
+
+ acb = qemu_aio_get(bs, cb, opaque);
+ ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
+ return &acb->common;
+ }
acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
if (!acb)
return NULL;
+
if (aio_read(&acb->aiocb) < 0) {
qemu_aio_release(acb);
return NULL;
@@ -418,6 +510,21 @@ static BlockDriverAIOCB *raw_aio_write(B
BlockDriverCompletionFunc *cb, void *opaque)
{
RawAIOCB *acb;
+ BDRVRawState *s = bs->opaque;
+
+ /*
+ * If O_DIRECT is used and the buffer is not aligned fall back
+ * to synchronous IO.
+ */
+ if (unlikely((s->open_flags & O_DIRECT) && ((uintptr_t) buf % 512))) {
+ int ret;
+
+ acb = qemu_aio_get(bs, cb, opaque);
+ ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
+ return &acb->common;
+ }
acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
if (!acb)
@@ -679,7 +786,7 @@ static int hdev_open(BlockDriverState *b
s->type = FTYPE_CD;
} else if (strstart(filename, "/dev/fd", NULL)) {
s->type = FTYPE_FD;
- s->fd_open_flags = open_flags;
+ s->open_flags = open_flags;
/* open will not fail even if no floppy is inserted */
open_flags |= O_NONBLOCK;
} else if (strstart(filename, "/dev/sg", NULL)) {
@@ -734,7 +841,7 @@ static int fd_open(BlockDriverState *bs)
#endif
return -EIO;
}
- s->fd = open(bs->filename, s->fd_open_flags);
+ s->fd = open(bs->filename, s->open_flags);
if (s->fd < 0) {
s->fd_error_time = qemu_get_clock(rt_clock);
s->fd_got_error = 1;
@@ -831,7 +938,7 @@ static int raw_eject(BlockDriverState *b
close(s->fd);
s->fd = -1;
}
- fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
+ fd = open(bs->filename, s->open_flags | O_NONBLOCK);
if (fd >= 0) {
if (ioctl(fd, FDEJECT, 0) < 0)
perror("FDEJECT");
next prev parent reply other threads:[~2008-04-30 12:13 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-04-17 13:31 [Qemu-devel] [PATCH] Align file accesses with cache=off (O_DIRECT) Kevin Wolf
2008-04-28 15:34 ` Kevin Wolf
2008-04-29 9:01 ` Laurent Vivier
2008-04-29 14:49 ` Kevin Wolf
2008-04-29 15:48 ` Laurent Vivier
2008-04-29 16:21 ` Kevin Wolf
2008-04-29 16:48 ` Laurent Vivier
2008-04-30 9:21 ` Kevin Wolf
2008-04-30 9:59 ` Laurent Vivier
2008-04-30 12:08 ` Kevin Wolf [this message]
2008-04-30 14:30 ` Blue Swirl
2008-04-30 21:05 ` Kevin Wolf
2008-05-01 14:35 ` Blue Swirl
2008-05-01 17:55 ` Kevin Wolf
2008-05-06 8:44 ` Kevin Wolf
2008-05-06 9:02 ` Laurent Vivier
2008-05-06 16:42 ` Blue Swirl
2008-05-06 16:56 ` Kevin Wolf
2008-05-06 17:23 ` Blue Swirl
2008-04-30 0:05 ` Jamie Lokier
2008-04-30 0:02 ` Jamie Lokier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=48186134.7070303@suse.de \
--to=kwolf@suse.de \
--cc=Laurent.Vivier@bull.net \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.