qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@gmail.com>
To: Christoph Hellwig <hch@lst.de>
Cc: Jan Kiszka <jan.kiszka@siemens.com>, qemu-devel <qemu-devel@nongnu.org>
Subject: Re: [Qemu-devel] Guest latency issues due to bdrv_check_byte_request
Date: Sat, 17 Apr 2010 22:32:21 +0100	[thread overview]
Message-ID: <y2hfbd9d3991004171432zdbe02ee9id669a6f2551000b8@mail.gmail.com> (raw)
In-Reply-To: <20100417194009.GA13862@lst.de>

Thanks Christoph.

Cached getlength with pread/pwrite:
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 96.97    1.760111       11893       148         4 futex
  1.61    0.029209           1     46891      2217 select
  0.28    0.005047           0     64609           timer_gettime
  0.22    0.004059           0     42745      2578 rt_sigreturn
  0.22    0.003911           0     46261           timer_settime
  0.18    0.003280        1093         3           shmdt
  0.17    0.003095           0     23859           pread  <---
  0.17    0.003061           0     42800           write
  0.16    0.002916           0     47759      5151 read
  0.02    0.000285           0       645           writev
[...]
  0.00    0.000000           0        13           lseek

Note that this is a Tiny Core Linux boot from disk and shutdown; not
very I/O intensive since it only loads a kernel and ~10 MB initramfs
without touching the disk much after kernel load.

diff --git a/block.c b/block.c
index 0f6be17..5c1652c 100644
--- a/block.c
+++ b/block.c
@@ -363,6 +363,7 @@ static int bdrv_open_common(BlockDriverState *bs,
const char *filename,
     assert(drv != NULL);

     bs->file = NULL;
+    bs->total_sectors = 0;
     bs->is_temporary = 0;
     bs->encrypted = 0;
     bs->valid_key = 0;
@@ -416,9 +417,7 @@ static int bdrv_open_common(BlockDriverState *bs,
const char *filename,
     }

     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
-    if (drv->bdrv_getlength) {
-        bs->total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
-    }
+    bs->total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
 #ifndef _WIN32
     if (bs->is_temporary) {
         unlink(filename);
@@ -957,13 +956,26 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
 {
     BlockDriver *drv = bs->drv;
+    int ret;
     if (!drv)
         return -ENOMEDIUM;
     if (!drv->bdrv_truncate)
         return -ENOTSUP;
     if (bs->read_only)
         return -EACCES;
-    return drv->bdrv_truncate(bs, offset);
+    ret = drv->bdrv_truncate(bs, offset);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* refresh total sectors */
+    if (drv->bdrv_getlength) {
+        bs->total_sectors = 0; /* discard cached value */
+        bs->total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+    } else {
+        bs->total_sectors = offset >> BDRV_SECTOR_BITS;
+    }
+    return ret;
 }

 /**
@@ -974,8 +986,12 @@ int64_t bdrv_getlength(BlockDriverState *bs)
     BlockDriver *drv = bs->drv;
     if (!drv)
         return -ENOMEDIUM;
-    if (!drv->bdrv_getlength) {
-        /* legacy mode */
+
+    /* Fixed size devices use the total_sectors value for speed instead of
+       issuing a length query (like lseek) on each call.  Also, legacy block
+       drivers don't provide a bdrv_getlength function and must use
+       total_sectors. */
+    if ((bs->total_sectors && !bs->growable) || !drv->bdrv_getlength) {
         return bs->total_sectors * BDRV_SECTOR_SIZE;
     }
     return drv->bdrv_getlength(bs);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 598ea19..7541ed2 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -105,7 +105,6 @@
 typedef struct BDRVRawState {
     int fd;
     int type;
-    unsigned int lseek_err_cnt;
     int open_flags;
 #if defined(__linux__)
     /* linux floppy specific */
@@ -134,8 +133,6 @@ static int raw_open_common(BlockDriverState *bs,
const char *filename,
     BDRVRawState *s = bs->opaque;
     int fd, ret;

-    s->lseek_err_cnt = 0;
-
     s->open_flags = open_flags | O_BINARY;
     s->open_flags &= ~O_ACCMODE;
     if (bdrv_flags & BDRV_O_RDWR) {
@@ -243,19 +240,7 @@ static int raw_pread_aligned(BlockDriverState
*bs, int64_t offset,
     if (ret < 0)
         return ret;

-    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
-        ++(s->lseek_err_cnt);
-        if(s->lseek_err_cnt <= 10) {
-            DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
-                              "] lseek failed : %d = %s\n",
-                              s->fd, bs->filename, offset, buf, count,
-                              bs->total_sectors, errno, strerror(errno));
-        }
-        return -1;
-    }
-    s->lseek_err_cnt=0;
-
-    ret = read(s->fd, buf, count);
+    ret = pread(s->fd, buf, count, offset);
     if (ret == count)
         goto label__raw_read__success;

@@ -276,12 +261,10 @@ static int raw_pread_aligned(BlockDriverState
*bs, int64_t offset,

     /* Try harder for CDrom. */
     if (bs->type == BDRV_TYPE_CDROM) {
-        lseek(s->fd, offset, SEEK_SET);
-        ret = read(s->fd, buf, count);
+        ret = pread(s->fd, buf, count, offset);
         if (ret == count)
             goto label__raw_read__success;
-        lseek(s->fd, offset, SEEK_SET);
-        ret = read(s->fd, buf, count);
+        ret = pread(s->fd, buf, count, offset);
         if (ret == count)
             goto label__raw_read__success;

@@ -313,19 +296,7 @@ static int raw_pwrite_aligned(BlockDriverState
*bs, int64_t offset,
     if (ret < 0)
         return -errno;

-    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
-        ++(s->lseek_err_cnt);
-        if(s->lseek_err_cnt) {
-            DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
-                              PRId64 "] lseek failed : %d = %s\n",
-                              s->fd, bs->filename, offset, buf, count,
-                              bs->total_sectors, errno, strerror(errno));
-        }
-        return -EIO;
-    }
-    s->lseek_err_cnt = 0;
-
-    ret = write(s->fd, buf, count);
+    ret = pwrite(s->fd, buf, count, offset);
     if (ret == count)
         goto label__raw_write__success;

Stefan

  reply	other threads:[~2010-04-17 21:32 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-15 18:15 [Qemu-devel] Guest latency issues due to bdrv_check_byte_request Jan Kiszka
2010-04-17 19:05 ` Stefan Hajnoczi
2010-04-17 19:40   ` Christoph Hellwig
2010-04-17 21:32     ` Stefan Hajnoczi [this message]
2010-04-18 17:37       ` Christoph Hellwig
2010-04-18 18:05       ` [Qemu-devel] " Jan Kiszka

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=y2hfbd9d3991004171432zdbe02ee9id669a6f2551000b8@mail.gmail.com \
    --to=stefanha@gmail.com \
    --cc=hch@lst.de \
    --cc=jan.kiszka@siemens.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).