qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@suse.de>
To: Laurent Vivier <Laurent.Vivier@bull.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH] Align file accesses with cache=off	(O_DIRECT)
Date: Tue, 29 Apr 2008 16:49:53 +0200	[thread overview]
Message-ID: <48173591.9010609@suse.de> (raw)
In-Reply-To: <1209459667.4328.7.camel@frecb07144>

[-- Attachment #1: Type: text/plain, Size: 964 bytes --]

Hi Laurent,

Laurent Vivier schrieb:
> But if we want too keep simplicity without memcpy(), we could only
> de-activate O_DIRECT on pread() or pwrite() :

You're right, this approach is simpler, better and makes the patch 
smaller. I've attached a new version of the patch.

However, now that you've pointed me to your patch I realize that my 
patch might be simple but it is incomplete as well. Normal read/write 
operation on qcow images should work fine (only metadata is unaligned 
and there pread is used). Snapshots don't work though because they end 
up in aio requests which are not routed to raw_pread.

Disabling O_DIRECT for a single aio request is impossible (after all, 
aio is asynchronous), and disabling it for at least one aio request is 
going to be ugly. So maybe we better turn O_DIRECT off for snapsnot 
saving/loading, even if it's not the generic fix I wanted to have when I 
started.

I'm still undecided, though. What do you think?

Kevin

[-- Attachment #2: align-odirect-accesses.patch --]
[-- Type: text/x-patch, Size: 3620 bytes --]

Index: block-raw-posix.c
===================================================================
--- block-raw-posix.c.orig
+++ block-raw-posix.c
@@ -77,6 +77,7 @@
 typedef struct BDRVRawState {
     int fd;
     int type;
+    int flags;
     unsigned int lseek_err_cnt;
 #if defined(__linux__)
     /* linux floppy specific */
@@ -95,6 +96,7 @@ static int raw_open(BlockDriverState *bs
     BDRVRawState *s = bs->opaque;
     int fd, open_flags, ret;
 
+    s->flags = flags;
     s->lseek_err_cnt = 0;
 
     open_flags = O_BINARY;
@@ -141,7 +143,14 @@ static int raw_open(BlockDriverState *bs
 #endif
 */
 
-static int raw_pread(BlockDriverState *bs, int64_t offset,
+/* 
+ * offset and count are in bytes, but must be multiples of 512 for files 
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
                      uint8_t *buf, int count)
 {
     BDRVRawState *s = bs->opaque;
@@ -194,7 +203,14 @@ label__raw_read__success:
     return ret;
 }
 
-static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+/* 
+ * offset and count are in bytes, but must be multiples of 512 for files 
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
                       const uint8_t *buf, int count)
 {
     BDRVRawState *s = bs->opaque;
@@ -230,6 +246,69 @@ label__raw_write__success:
     return ret;
 }
 
+
+#ifdef O_DIRECT
+/* 
+ * offset and count are in bytes and possibly not aligned. For files opened 
+ * with O_DIRECT, necessary alignments are ensured before calling 
+ * raw_pread_aligned to do the actual read.
+ */
+static int raw_pread(BlockDriverState *bs, int64_t offset,
+                     uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (unlikely((s->flags & BDRV_O_DIRECT) &&
+            (offset % 512 != 0 || (uintptr_t) buf % 512))) {
+
+        int flags, ret;
+
+        // Temporarily disable O_DIRECT for unaligned access
+        flags = fcntl(s->fd, F_GETFL);
+        fcntl(s->fd, F_SETFL, flags & ~O_DIRECT);
+        ret = raw_pread_aligned(bs, offset, buf, count);
+        fcntl(s->fd, F_SETFL, flags);
+
+        return ret;
+
+    } else {
+        return raw_pread_aligned(bs, offset, buf, count);
+    }
+}
+
+/* 
+ * offset and count are in bytes and possibly not aligned. For files opened 
+ * with O_DIRECT, necessary alignments are ensured before calling 
+ * raw_pwrite_aligned to do the actual write.
+ */
+static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+                      const uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if (unlikely((s->flags & BDRV_O_DIRECT) &&
+            (offset % 512 != 0 || (uintptr_t) buf % 512))) {
+
+        int flags, ret;
+
+        // Temporarily disable O_DIRECT for unaligned access
+        flags = fcntl(s->fd, F_GETFL);
+        fcntl(s->fd, F_SETFL, flags & ~O_DIRECT);
+        ret = raw_pwrite_aligned(bs, offset, buf, count);
+        fcntl(s->fd, F_SETFL, flags);
+
+        return ret;
+    } else {
+        return raw_pwrite_aligned(bs, offset, buf, count);
+    }
+}
+
+#else
+#define raw_pread raw_pread_aligned
+#define raw_pwrite raw_pwrite_aligned
+#endif
+
+
 /***********************************************************/
 /* Unix AIO using POSIX AIO */
 

  reply	other threads:[~2008-04-29 14:54 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-17 13:31 [Qemu-devel] [PATCH] Align file accesses with cache=off (O_DIRECT) Kevin Wolf
2008-04-28 15:34 ` Kevin Wolf
2008-04-29  9:01   ` Laurent Vivier
2008-04-29 14:49     ` Kevin Wolf [this message]
2008-04-29 15:48       ` Laurent Vivier
2008-04-29 16:21         ` Kevin Wolf
2008-04-29 16:48           ` Laurent Vivier
2008-04-30  9:21             ` Kevin Wolf
2008-04-30  9:59               ` Laurent Vivier
2008-04-30 12:08                 ` Kevin Wolf
2008-04-30 14:30                   ` Blue Swirl
2008-04-30 21:05                     ` Kevin Wolf
2008-05-01 14:35                       ` Blue Swirl
2008-05-01 17:55                         ` Kevin Wolf
2008-05-06  8:44                           ` Kevin Wolf
2008-05-06  9:02                             ` Laurent Vivier
2008-05-06 16:42                             ` Blue Swirl
2008-05-06 16:56                               ` Kevin Wolf
2008-05-06 17:23                                 ` Blue Swirl
2008-04-30  0:05           ` Jamie Lokier
2008-04-30  0:02       ` Jamie Lokier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48173591.9010609@suse.de \
    --to=kwolf@suse.de \
    --cc=Laurent.Vivier@bull.net \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).