From: Kevin Wolf <kwolf@redhat.com>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, pbonzini@redhat.com, armbru@redhat.com,
stefanha@redhat.com
Subject: [Qemu-devel] [RFC PATCH 05/19] raw: Probe required direct I/O alignment
Date: Fri, 6 Dec 2013 18:22:46 +0100 [thread overview]
Message-ID: <1386350580-5666-6-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1386350580-5666-1-git-send-email-kwolf@redhat.com>
From: Paolo Bonzini <pbonzini@redhat.com>
Add a bs->request_alignment field that contains the required
offset/length alignment for I/O requests and fill it in the raw block
drivers. Use ioctls if possible, else see what alignment it takes for
O_DIRECT to succeed.
While at it, also expose the memory alignment requirements, which may be
(and in practice are) different from the disk alignment requirements.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block.c | 4 ++
block/raw-posix.c | 102 ++++++++++++++++++++++++++++++++++++++--------
block/raw-win32.c | 41 +++++++++++++++++++
include/block/block_int.h | 3 ++
4 files changed, 132 insertions(+), 18 deletions(-)
diff --git a/block.c b/block.c
index 34fedf0..b4b17fd 100644
--- a/block.c
+++ b/block.c
@@ -789,6 +789,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
bs->open_flags = flags;
bs->guest_block_size = 512;
+ bs->request_alignment = 512;
bs->zero_beyond_eof = true;
open_flags = bdrv_open_flags(bs, flags);
bs->read_only = !(open_flags & BDRV_O_RDWR);
@@ -856,6 +857,9 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
goto free_and_fail;
}
+ assert(bdrv_opt_mem_align(bs) != 0);
+ assert(bs->request_alignment != 0);
+
#ifndef _WIN32
if (bs->is_temporary) {
assert(bs->filename[0] != '\0');
diff --git a/block/raw-posix.c b/block/raw-posix.c
index f836c8e..732aff5 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,6 +127,8 @@ typedef struct BDRVRawState {
int fd;
int type;
int open_flags;
+ size_t buf_align;
+
#if defined(__linux__)
/* linux floppy specific */
int64_t fd_open_time;
@@ -211,6 +213,76 @@ static int raw_normalize_devicepath(const char **filename)
}
#endif
+static void raw_probe_alignment(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ char *buf;
+ unsigned int sector_size;
+
+ /* For /dev/sg devices the alignment is not really used.
+ With buffered I/O, we don't have any restrictions. */
+ if (bs->sg || !(s->open_flags & O_DIRECT)) {
+ bs->request_alignment = 1;
+ s->buf_align = 1;
+ return;
+ }
+
+ /* Try a few ioctls to get the right size */
+ bs->request_alignment = 0;
+ s->buf_align = 0;
+
+#ifdef BLKSSZGET
+ if (ioctl(s->fd, BLKSSZGET, §or_size) >= 0) {
+ bs->request_alignment = sector_size;
+ }
+#endif
+#ifdef DKIOCGETBLOCKSIZE
+ if (ioctl(s->fd, DKIOCGETBLOCKSIZE, §or_size) >= 0) {
+ bs->request_alignment = sector_size;
+ }
+#endif
+#ifdef DIOCGSECTORSIZE
+ if (ioctl(s->fd, DIOCGSECTORSIZE, §or_size) >= 0) {
+ bs->request_alignment = sector_size;
+ }
+#endif
+#ifdef CONFIG_XFS
+ if (s->is_xfs) {
+ struct dioattr da;
+ if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
+ bs->request_alignment = da.d_miniosz;
+ /* The kernel returns wrong information for d_mem */
+ /* s->buf_align = da.d_mem; */
+ }
+ }
+#endif
+
+ /* If we could not get the sizes so far, we can only guess them */
+ if (!s->buf_align) {
+ size_t align;
+ buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
+ for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+ if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
+ s->buf_align = align;
+ break;
+ }
+ }
+ qemu_vfree(buf);
+ }
+
+ if (!bs->request_alignment) {
+ size_t align;
+ buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
+ for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+ if (pread(s->fd, buf, align, 0) >= 0) {
+ bs->request_alignment = align;
+ break;
+ }
+ }
+ qemu_vfree(buf);
+ }
+}
+
static void raw_parse_flags(int bdrv_flags, int *open_flags)
{
assert(open_flags != NULL);
@@ -330,6 +402,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
}
#endif
+ raw_probe_alignment(bs);
+
ret = 0;
fail:
qemu_opts_del(opts);
@@ -432,7 +506,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
return ret;
}
-
static void raw_reopen_commit(BDRVReopenState *state)
{
BDRVRawReopenState *raw_s = state->opaque;
@@ -468,23 +541,11 @@ static void raw_reopen_abort(BDRVReopenState *state)
state->opaque = NULL;
}
-
-/* XXX: use host sector size if necessary with:
-#ifdef DIOCGSECTORSIZE
- {
- unsigned int sectorsize = 512;
- if (!ioctl(fd, DIOCGSECTORSIZE, §orsize) &&
- sectorsize > bufsize)
- bufsize = sectorsize;
- }
-#endif
-#ifdef CONFIG_COCOA
- uint32_t blockSize = 512;
- if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
- bufsize = blockSize;
- }
-#endif
-*/
+static int raw_opt_mem_align(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ return s->buf_align;
+}
static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
{
@@ -1227,6 +1288,7 @@ static BlockDriver bdrv_file = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
+ .bdrv_opt_mem_align = raw_opt_mem_align,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1582,6 +1644,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
+ .bdrv_opt_mem_align = raw_opt_mem_align,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1712,6 +1775,7 @@ static BlockDriver bdrv_host_floppy = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_opt_mem_align = raw_opt_mem_align,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1822,6 +1886,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_opt_mem_align = raw_opt_mem_align,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1951,6 +2016,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_opt_mem_align = raw_opt_mem_align,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 2bad5a3..6fe64d2 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -202,6 +202,35 @@ static int set_sparse(int fd)
NULL, 0, NULL, 0, &returned, NULL);
}
+static void raw_probe_alignment(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ DWORD sectorsPerCluster, freeClusters, totalClusters, count;
+ DISK_GEOMETRY_EX dg;
+ BOOL status;
+
+ if (s->type == FTYPE_CD) {
+ bs->request_alignment = 2048;
+ return;
+ }
+ if (s->type == FTYPE_HARDDISK) {
+ status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
+ NULL, 0, &dg, sizeof(dg), &count, NULL);
+ if (status != 0) {
+ bs->request_alignment = dg.Geometry.BytesPerSector;
+ return;
+ }
+ /* try GetDiskFreeSpace too */
+ }
+
+ if (s->drive_path[0]) {
+ GetDiskFreeSpace(s->drive_path, §orsPerCluster,
+ &dg.Geometry.BytesPerSector,
+ &freeClusters, &totalClusters);
+ bs->request_alignment = dg.Geometry.BytesPerSector;
+ }
+}
+
static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
{
assert(access_flags != NULL);
@@ -269,6 +298,17 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
}
}
+ if (filename[0] && filename[1] == ':') {
+ snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
+ } else if (filename[0] == '\\' && filename[1] == '\\') {
+ s->drive_path[0] = 0;
+ } else {
+ /* Relative path. */
+ char buf[MAX_PATH];
+ GetCurrentDirectory(MAX_PATH, buf);
+ snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
+ }
+
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
OPEN_EXISTING, overlapped, NULL);
@@ -293,6 +333,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
s->aio = aio;
}
+ raw_probe_alignment(bs);
ret = 0;
fail:
qemu_opts_del(opts);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 760cfc5..31ce7d4 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -293,6 +293,9 @@ struct BlockDriverState {
/* Whether produces zeros when read beyond eof */
bool zero_beyond_eof;
+ /* Alignment requirement for offset/length of I/O requests */
+ unsigned int request_alignment;
+
/* the block size for which the guest device expects atomicity */
int guest_block_size;
--
1.8.1.4
next prev parent reply other threads:[~2013-12-06 17:23 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-06 17:22 [Qemu-devel] [RFC PATCH 00/19] block: Support for 512b-on-4k emulation Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 01/19] qemu_memalign: Allow small alignments Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 02/19] block: Detect unaligned length in bdrv_qiov_is_aligned() Kevin Wolf
2013-12-06 19:12 ` Eric Blake
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 03/19] block: Don't use guest sector size for qemu_blockalign() Kevin Wolf
2013-12-10 3:18 ` Wenchao Xia
2013-12-10 9:42 ` Kevin Wolf
2013-12-11 2:43 ` Wenchao Xia
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 04/19] block: rename buffer_alignment to guest_block_size Kevin Wolf
2013-12-10 3:25 ` Wenchao Xia
2013-12-06 17:22 ` Kevin Wolf [this message]
2013-12-06 17:53 ` [Qemu-devel] [RFC PATCH 05/19] raw: Probe required direct I/O alignment Paolo Bonzini
2013-12-09 12:58 ` Kevin Wolf
2013-12-09 13:40 ` Paolo Bonzini
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 06/19] block: Introduce bdrv_aligned_preadv() Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 07/19] block: Introduce bdrv_co_do_preadv() Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 08/19] block: Introduce bdrv_aligned_pwritev() Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 09/19] block: write: Handle COR dependency after I/O throttling Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 10/19] block: Introduce bdrv_co_do_pwritev() Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 11/19] block: Switch BdrvTrackedRequest to byte granularity Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 12/19] block: Allow waiting for overlapping requests between begin/end Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 13/19] block: Make zero-after-EOF work with larger alignment Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 14/19] block: Generalise and optimise COR serialisation Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 15/19] block: Make overlap range for serialisation dynamic Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 16/19] block: Align requests in bdrv_co_do_pwritev() Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 17/19] block: Change coroutine wrapper to byte granularity Kevin Wolf
2013-12-06 17:22 ` [Qemu-devel] [RFC PATCH 18/19] block: Make bdrv_pread() a bdrv_prwv_co() wrapper Kevin Wolf
2013-12-06 17:23 ` [Qemu-devel] [RFC PATCH 19/19] block: Make bdrv_pwrite() " Kevin Wolf
2013-12-06 17:55 ` [Qemu-devel] [RFC PATCH 00/19] block: Support for 512b-on-4k emulation Paolo Bonzini
2013-12-09 11:16 ` Kevin Wolf
2013-12-09 12:51 ` Stefan Hajnoczi
2013-12-09 13:02 ` Kevin Wolf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1386350580-5666-6-git-send-email-kwolf@redhat.com \
--to=kwolf@redhat.com \
--cc=armbru@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.