From: Kevin Wolf <kwolf@redhat.com>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, pbonzini@redhat.com, pl@kamp.de, stefanha@redhat.com
Subject: [Qemu-devel] [PATCH 08/22] raw: Probe required direct I/O alignment
Date: Wed, 11 Dec 2013 22:08:15 +0100 [thread overview]
Message-ID: <1386796109-15264-9-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1386796109-15264-1-git-send-email-kwolf@redhat.com>
From: Paolo Bonzini <pbonzini@redhat.com>
Add a bs->request_alignment field that contains the required
offset/length alignment for I/O requests and fill it in the raw block
drivers. Use ioctls if possible, else see what alignment it takes for
O_DIRECT to succeed.
While at it, also expose the memory alignment requirements, which may be
(and in practice are) different from the disk alignment requirements.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block.c | 3 ++
block/raw-posix.c | 102 ++++++++++++++++++++++++++++++++++++++--------
block/raw-win32.c | 41 +++++++++++++++++++
include/block/block_int.h | 3 ++
4 files changed, 132 insertions(+), 17 deletions(-)
diff --git a/block.c b/block.c
index 3504d17..b86e754 100644
--- a/block.c
+++ b/block.c
@@ -813,6 +813,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
bs->open_flags = flags;
bs->guest_block_size = 512;
+ bs->request_alignment = 512;
bs->zero_beyond_eof = true;
open_flags = bdrv_open_flags(bs, flags);
bs->read_only = !(open_flags & BDRV_O_RDWR);
@@ -881,6 +882,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
}
bdrv_refresh_limits(bs);
+ assert(bdrv_opt_mem_align(bs) != 0);
+ assert(bs->request_alignment != 0);
#ifndef _WIN32
if (bs->is_temporary) {
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 10c6b34..e8e75a7 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,6 +127,8 @@ typedef struct BDRVRawState {
int fd;
int type;
int open_flags;
+ size_t buf_align;
+
#if defined(__linux__)
/* linux floppy specific */
int64_t fd_open_time;
@@ -213,6 +215,76 @@ static int raw_normalize_devicepath(const char **filename)
}
#endif
+static void raw_probe_alignment(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ char *buf;
+ unsigned int sector_size;
+
+ /* For /dev/sg devices the alignment is not really used.
+ With buffered I/O, we don't have any restrictions. */
+ if (bs->sg || !(s->open_flags & O_DIRECT)) {
+ bs->request_alignment = 1;
+ s->buf_align = 1;
+ return;
+ }
+
+ /* Try a few ioctls to get the right size */
+ bs->request_alignment = 0;
+ s->buf_align = 0;
+
+#ifdef BLKSSZGET
+ if (ioctl(s->fd, BLKSSZGET, §or_size) >= 0) {
+ bs->request_alignment = sector_size;
+ }
+#endif
+#ifdef DKIOCGETBLOCKSIZE
+ if (ioctl(s->fd, DKIOCGETBLOCKSIZE, §or_size) >= 0) {
+ bs->request_alignment = sector_size;
+ }
+#endif
+#ifdef DIOCGSECTORSIZE
+ if (ioctl(s->fd, DIOCGSECTORSIZE, §or_size) >= 0) {
+ bs->request_alignment = sector_size;
+ }
+#endif
+#ifdef CONFIG_XFS
+ if (s->is_xfs) {
+ struct dioattr da;
+ if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
+ bs->request_alignment = da.d_miniosz;
+ /* The kernel returns wrong information for d_mem */
+ /* s->buf_align = da.d_mem; */
+ }
+ }
+#endif
+
+ /* If we could not get the sizes so far, we can only guess them */
+ if (!s->buf_align) {
+ size_t align;
+ buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
+ for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+ if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
+ s->buf_align = align;
+ break;
+ }
+ }
+ qemu_vfree(buf);
+ }
+
+ if (!bs->request_alignment) {
+ size_t align;
+ buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
+ for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+ if (pread(s->fd, buf, align, 0) >= 0) {
+ bs->request_alignment = align;
+ break;
+ }
+ }
+ qemu_vfree(buf);
+ }
+}
+
static void raw_parse_flags(int bdrv_flags, int *open_flags)
{
assert(open_flags != NULL);
@@ -463,7 +535,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
return ret;
}
-
static void raw_reopen_commit(BDRVReopenState *state)
{
BDRVRawReopenState *raw_s = state->opaque;
@@ -499,23 +570,15 @@ static void raw_reopen_abort(BDRVReopenState *state)
state->opaque = NULL;
}
+static int raw_refresh_limits(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
-/* XXX: use host sector size if necessary with:
-#ifdef DIOCGSECTORSIZE
- {
- unsigned int sectorsize = 512;
- if (!ioctl(fd, DIOCGSECTORSIZE, §orsize) &&
- sectorsize > bufsize)
- bufsize = sectorsize;
- }
-#endif
-#ifdef CONFIG_COCOA
- uint32_t blockSize = 512;
- if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
- bufsize = blockSize;
- }
-#endif
-*/
+ raw_probe_alignment(bs);
+ bs->bl.opt_mem_alignment = s->buf_align;
+
+ return 0;
+}
static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
{
@@ -1363,6 +1426,7 @@ static BlockDriver bdrv_file = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1740,6 +1804,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1871,6 +1936,7 @@ static BlockDriver bdrv_host_floppy = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1981,6 +2047,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2110,6 +2177,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 2bad5a3..6fe64d2 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -202,6 +202,35 @@ static int set_sparse(int fd)
NULL, 0, NULL, 0, &returned, NULL);
}
+static void raw_probe_alignment(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ DWORD sectorsPerCluster, freeClusters, totalClusters, count;
+ DISK_GEOMETRY_EX dg;
+ BOOL status;
+
+ if (s->type == FTYPE_CD) {
+ bs->request_alignment = 2048;
+ return;
+ }
+ if (s->type == FTYPE_HARDDISK) {
+ status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
+ NULL, 0, &dg, sizeof(dg), &count, NULL);
+ if (status != 0) {
+ bs->request_alignment = dg.Geometry.BytesPerSector;
+ return;
+ }
+ /* try GetDiskFreeSpace too */
+ }
+
+ if (s->drive_path[0]) {
+ GetDiskFreeSpace(s->drive_path, §orsPerCluster,
+ &dg.Geometry.BytesPerSector,
+ &freeClusters, &totalClusters);
+ bs->request_alignment = dg.Geometry.BytesPerSector;
+ }
+}
+
static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
{
assert(access_flags != NULL);
@@ -269,6 +298,17 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
}
}
+ if (filename[0] && filename[1] == ':') {
+ snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
+ } else if (filename[0] == '\\' && filename[1] == '\\') {
+ s->drive_path[0] = 0;
+ } else {
+ /* Relative path. */
+ char buf[MAX_PATH];
+ GetCurrentDirectory(MAX_PATH, buf);
+ snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
+ }
+
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
OPEN_EXISTING, overlapped, NULL);
@@ -293,6 +333,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
s->aio = aio;
}
+ raw_probe_alignment(bs);
ret = 0;
fail:
qemu_opts_del(opts);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9921cd3..0a01b69 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -319,6 +319,9 @@ struct BlockDriverState {
/* Whether produces zeros when read beyond eof */
bool zero_beyond_eof;
+ /* Alignment requirement for offset/length of I/O requests */
+ unsigned int request_alignment;
+
/* the block size for which the guest device expects atomicity */
int guest_block_size;
--
1.8.1.4
next prev parent reply other threads:[~2013-12-11 21:09 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-11 21:08 [Qemu-devel] [PATCH 00/22] block: Support for 512b-on-4k emulation Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 01/22] block: Move initialisation of BlockLimits to bdrv_refresh_limits() Kevin Wolf
2013-12-12 2:54 ` Wenchao Xia
2013-12-12 2:57 ` Wenchao Xia
2013-12-12 7:51 ` Thomas Huth
2013-12-12 9:32 ` Kevin Wolf
2013-12-12 6:17 ` Peter Lieven
2013-12-12 9:35 ` Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 02/22] block: Inherit opt_transfer_length Kevin Wolf
2013-12-12 3:02 ` Wenchao Xia
2013-12-11 21:08 ` [Qemu-devel] [PATCH 03/22] block: Update BlockLimits when they might have changed Kevin Wolf
2013-12-12 3:09 ` Wenchao Xia
2013-12-11 21:08 ` [Qemu-devel] [PATCH 04/22] qemu_memalign: Allow small alignments Kevin Wolf
2013-12-12 3:09 ` Wenchao Xia
2013-12-11 21:08 ` [Qemu-devel] [PATCH 05/22] block: Detect unaligned length in bdrv_qiov_is_aligned() Kevin Wolf
2013-12-12 3:15 ` Wenchao Xia
2013-12-11 21:08 ` [Qemu-devel] [PATCH 06/22] block: Don't use guest sector size for qemu_blockalign() Kevin Wolf
2013-12-12 6:32 ` Wenchao Xia
2013-12-11 21:08 ` [Qemu-devel] [PATCH 07/22] block: rename buffer_alignment to guest_block_size Kevin Wolf
2013-12-12 6:34 ` Wenchao Xia
2013-12-11 21:08 ` Kevin Wolf [this message]
2013-12-11 21:08 ` [Qemu-devel] [PATCH 09/22] block: Introduce bdrv_aligned_preadv() Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 10/22] block: Introduce bdrv_co_do_preadv() Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 11/22] block: Introduce bdrv_aligned_pwritev() Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 12/22] block: write: Handle COR dependency after I/O throttling Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 13/22] block: Introduce bdrv_co_do_pwritev() Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 14/22] block: Switch BdrvTrackedRequest to byte granularity Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 15/22] block: Allow waiting for overlapping requests between begin/end Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 16/22] block: Make zero-after-EOF work with larger alignment Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 17/22] block: Generalise and optimise COR serialisation Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 18/22] block: Make overlap range for serialisation dynamic Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 19/22] block: Align requests in bdrv_co_do_pwritev() Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 20/22] block: Change coroutine wrapper to byte granularity Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 21/22] block: Make bdrv_pread() a bdrv_prwv_co() wrapper Kevin Wolf
2013-12-11 21:08 ` [Qemu-devel] [PATCH 22/22] block: Make bdrv_pwrite() " Kevin Wolf
2013-12-12 6:09 ` [Qemu-devel] [PATCH 00/22] block: Support for 512b-on-4k emulation Peter Lieven
2013-12-12 9:47 ` Kevin Wolf
2013-12-12 10:26 ` Peter Lieven
2013-12-12 12:48 ` Peter Lieven
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1386796109-15264-9-git-send-email-kwolf@redhat.com \
--to=kwolf@redhat.com \
--cc=pbonzini@redhat.com \
--cc=pl@kamp.de \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.