From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, pl@kamp.de, ronniesahlberg@gmail.com,
stefanha@redhat.com
Subject: [Qemu-devel] [PATCH v3 15/19] raw-posix: add support for write_zeroes on XFS and block devices
Date: Fri, 22 Nov 2013 13:39:57 +0100 [thread overview]
Message-ID: <1385124001-3576-16-git-send-email-pbonzini@redhat.com> (raw)
In-Reply-To: <1385124001-3576-1-git-send-email-pbonzini@redhat.com>
The code is similar to the implementation of discard and write_zeroes
with UNMAP. However, failure must be propagated up to block.c.
The stale page cache problem can be reproduced as follows:
# modprobe scsi-debug lbpws=1 lbprz=1
# ./qemu-io /dev/sdXX
qemu-io> write -P 0xcc 0 2M
qemu-io> write -z 0 1M
qemu-io> read -P 0x00 0 512
Pattern verification failed at offset 0, 512 bytes
qemu-io> read -v 0 512
00000000: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc ................
...
# ./qemu-io --cache=none /dev/sdXX
qemu-io> write -P 0xcc 0 2M
qemu-io> write -z 0 1M
qemu-io> read -P 0x00 0 512
qemu-io> read -v 0 512
00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
...
And similarly with discard instead of "write -z".
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
block/raw-aio.h | 3 +-
block/raw-posix.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++--------
2 files changed, 74 insertions(+), 13 deletions(-)
diff --git a/block/raw-aio.h b/block/raw-aio.h
index c61f159..7ad0a8a 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -21,9 +21,10 @@
#define QEMU_AIO_IOCTL 0x0004
#define QEMU_AIO_FLUSH 0x0008
#define QEMU_AIO_DISCARD 0x0010
+#define QEMU_AIO_WRITE_ZEROES 0x0020
#define QEMU_AIO_TYPE_MASK \
(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
- QEMU_AIO_DISCARD)
+ QEMU_AIO_DISCARD|QEMU_AIO_WRITE_ZEROES)
/* AIO flags */
#define QEMU_AIO_MISALIGNED 0x1000
diff --git a/block/raw-posix.c b/block/raw-posix.c
index b3feed6..10c6b34 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -142,6 +142,7 @@ typedef struct BDRVRawState {
bool is_xfs:1;
#endif
bool has_discard:1;
+ bool has_write_zeroes:1;
bool discard_zeroes:1;
} BDRVRawState;
@@ -326,6 +327,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
#endif
s->has_discard = true;
+ s->has_write_zeroes = true;
if (fstat(s->fd, &st) < 0) {
error_setg_errno(errp, errno, "Could not stat file");
@@ -344,9 +346,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
#ifdef __linux__
/* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do
* not rely on the contents of discarded blocks unless using O_DIRECT.
+ * Same for BLKZEROOUT.
*/
if (!(bs->open_flags & BDRV_O_NOCACHE)) {
s->discard_zeroes = false;
+ s->has_write_zeroes = false;
}
#endif
}
@@ -702,6 +706,23 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
}
#ifdef CONFIG_XFS
+static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
+{
+ struct xfs_flock64 fl;
+
+ memset(&fl, 0, sizeof(fl));
+ fl.l_whence = SEEK_SET;
+ fl.l_start = offset;
+ fl.l_len = bytes;
+
+ if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) {
+ DEBUG_BLOCK_PRINT("cannot write zero range (%s)\n", strerror(errno));
+ return -errno;
+ }
+
+ return 0;
+}
+
static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
{
struct xfs_flock64 fl;
@@ -720,6 +741,42 @@ static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
}
#endif
+static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
+{
+ int ret = -EOPNOTSUPP;
+ BDRVRawState *s = aiocb->bs->opaque;
+
+ if (s->has_write_zeroes == 0) {
+ return -ENOTSUP;
+ }
+
+ if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
+#ifdef BLKZEROOUT
+ do {
+ uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
+ if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
+ return 0;
+ }
+ } while (errno == EINTR);
+
+ ret = -errno;
+#endif
+ } else {
+#ifdef CONFIG_XFS
+ if (s->is_xfs) {
+ return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
+ }
+#endif
+ }
+
+ if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
+ ret == -ENOTTY) {
+ s->has_write_zeroes = false;
+ ret = -ENOTSUP;
+ }
+ return ret;
+}
+
static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
{
int ret = -EOPNOTSUPP;
@@ -804,6 +861,9 @@ static int aio_worker(void *arg)
case QEMU_AIO_DISCARD:
ret = handle_aiocb_discard(aiocb);
break;
+ case QEMU_AIO_WRITE_ZEROES:
+ ret = handle_aiocb_write_zeroes(aiocb);
+ break;
default:
fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
ret = -EINVAL;
@@ -1256,13 +1316,13 @@ static int coroutine_fn raw_co_write_zeroes(
BDRVRawState *s = bs->opaque;
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
- return -ENOTSUP;
- }
- if (!s->discard_zeroes) {
- return -ENOTSUP;
+ return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+ QEMU_AIO_WRITE_ZEROES);
+ } else if (s->discard_zeroes) {
+ return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+ QEMU_AIO_DISCARD);
}
- return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
- QEMU_AIO_DISCARD);
+ return -ENOTSUP;
}
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -1613,13 +1673,13 @@ static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
return rc;
}
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
- return -ENOTSUP;
- }
- if (!s->discard_zeroes) {
- return -ENOTSUP;
+ return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+ QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
+ } else if (s->discard_zeroes) {
+ return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+ QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
- return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
- QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
+ return -ENOTSUP;
}
static int hdev_create(const char *filename, QEMUOptionParameter *options,
--
1.8.4.2
next prev parent reply other threads:[~2013-11-22 12:41 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-11-22 12:39 [Qemu-devel] [PATCH v3 00/19] block & scsi: write_zeroes support through the whole stack Paolo Bonzini
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 01/19] block: generalize BlockLimits handling to cover bdrv_aio_discard too Paolo Bonzini
2013-11-25 9:09 ` Peter Lieven
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 02/19] block: add flags to BlockRequest Paolo Bonzini
2013-11-25 9:11 ` Peter Lieven
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 03/19] block: add flags argument to bdrv_co_write_zeroes tracepoint Paolo Bonzini
2013-11-25 9:12 ` Peter Lieven
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 04/19] block: add bdrv_aio_write_zeroes Paolo Bonzini
2013-11-25 9:13 ` Peter Lieven
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 05/19] block: handle ENOTSUP from discard in generic code Paolo Bonzini
2013-11-25 10:06 ` Peter Lieven
2013-11-25 10:16 ` Paolo Bonzini
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 06/19] block: make bdrv_co_do_write_zeroes stricter in producing aligned requests Paolo Bonzini
2013-11-25 10:23 ` Peter Lieven
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 07/19] vpc, vhdx: add get_info Paolo Bonzini
2013-11-25 10:27 ` Peter Lieven
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 08/19] block drivers: add discard/write_zeroes properties to bdrv_get_info implementation Paolo Bonzini
2013-11-25 10:29 ` Peter Lieven
2013-12-03 15:09 ` Kevin Wolf
2013-12-03 15:21 ` Paolo Bonzini
2013-12-03 17:10 ` Peter Lieven
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 09/19] block drivers: expose requirement for write same alignment from formats Paolo Bonzini
2013-11-25 10:33 ` Peter Lieven
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 10/19] block/iscsi: remove .bdrv_has_zero_init Paolo Bonzini
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 11/19] block/iscsi: updated copyright Paolo Bonzini
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 12/19] block/iscsi: check WRITE SAME support differently depending on MAY_UNMAP Paolo Bonzini
2013-11-25 10:34 ` Peter Lieven
2013-11-25 10:42 ` Paolo Bonzini
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 13/19] raw-posix: implement write_zeroes with MAY_UNMAP for files Paolo Bonzini
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 14/19] raw-posix: implement write_zeroes with MAY_UNMAP for block devices Paolo Bonzini
2013-11-22 12:39 ` Paolo Bonzini [this message]
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 16/19] qemu-iotests: 033 is fast Paolo Bonzini
2013-11-22 12:39 ` [Qemu-devel] [PATCH v3 17/19] scsi-disk: catch write protection errors in UNMAP Paolo Bonzini
2013-11-22 12:40 ` [Qemu-devel] [PATCH v3 18/19] scsi-disk: reject ANCHOR=1 for UNMAP and WRITE SAME commands Paolo Bonzini
2013-11-22 12:40 ` [Qemu-devel] [PATCH v3 19/19] scsi-disk: correctly implement WRITE SAME Paolo Bonzini
2013-12-03 14:29 ` [Qemu-devel] [PATCH v3 00/19] block & scsi: write_zeroes support through the whole stack Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1385124001-3576-16-git-send-email-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=kwolf@redhat.com \
--cc=pl@kamp.de \
--cc=qemu-devel@nongnu.org \
--cc=ronniesahlberg@gmail.com \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).