From: Kevin Wolf <kwolf@redhat.com>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, pbonzini@redhat.com, pl@kamp.de, stefanha@redhat.com
Subject: [Qemu-devel] [PATCH v2 08/24] raw: Probe required direct I/O alignment
Date: Fri, 13 Dec 2013 14:22:43 +0100 [thread overview]
Message-ID: <1386940979-3824-9-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1386940979-3824-1-git-send-email-kwolf@redhat.com>
From: Paolo Bonzini <pbonzini@redhat.com>
Add a bs->request_alignment field that contains the required
offset/length alignment for I/O requests and fill it in the raw block
drivers. Use ioctls if possible, else see what alignment it takes for
O_DIRECT to succeed.
While at it, also expose the memory alignment requirements, which may be
(and in practice are) different from the disk alignment requirements.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block.c | 3 ++
block/raw-posix.c | 102 ++++++++++++++++++++++++++++++++++++++--------
block/raw-win32.c | 41 +++++++++++++++++++
include/block/block_int.h | 3 ++
4 files changed, 132 insertions(+), 17 deletions(-)
diff --git a/block.c b/block.c
index 3504d17..b86e754 100644
--- a/block.c
+++ b/block.c
@@ -813,6 +813,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
bs->open_flags = flags;
bs->guest_block_size = 512;
+ bs->request_alignment = 512;
bs->zero_beyond_eof = true;
open_flags = bdrv_open_flags(bs, flags);
bs->read_only = !(open_flags & BDRV_O_RDWR);
@@ -881,6 +882,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
}
bdrv_refresh_limits(bs);
+ assert(bdrv_opt_mem_align(bs) != 0);
+ assert(bs->request_alignment != 0);
#ifndef _WIN32
if (bs->is_temporary) {
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 10c6b34..e8e75a7 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,6 +127,8 @@ typedef struct BDRVRawState {
int fd;
int type;
int open_flags;
+ size_t buf_align;
+
#if defined(__linux__)
/* linux floppy specific */
int64_t fd_open_time;
@@ -213,6 +215,76 @@ static int raw_normalize_devicepath(const char **filename)
}
#endif
+static void raw_probe_alignment(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ char *buf;
+ unsigned int sector_size;
+
+ /* For /dev/sg devices the alignment is not really used.
+ With buffered I/O, we don't have any restrictions. */
+ if (bs->sg || !(s->open_flags & O_DIRECT)) {
+ bs->request_alignment = 1;
+ s->buf_align = 1;
+ return;
+ }
+
+ /* Try a few ioctls to get the right size */
+ bs->request_alignment = 0;
+ s->buf_align = 0;
+
+#ifdef BLKSSZGET
+ if (ioctl(s->fd, BLKSSZGET, §or_size) >= 0) {
+ bs->request_alignment = sector_size;
+ }
+#endif
+#ifdef DKIOCGETBLOCKSIZE
+ if (ioctl(s->fd, DKIOCGETBLOCKSIZE, §or_size) >= 0) {
+ bs->request_alignment = sector_size;
+ }
+#endif
+#ifdef DIOCGSECTORSIZE
+ if (ioctl(s->fd, DIOCGSECTORSIZE, §or_size) >= 0) {
+ bs->request_alignment = sector_size;
+ }
+#endif
+#ifdef CONFIG_XFS
+ if (s->is_xfs) {
+ struct dioattr da;
+ if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
+ bs->request_alignment = da.d_miniosz;
+ /* The kernel returns wrong information for d_mem */
+ /* s->buf_align = da.d_mem; */
+ }
+ }
+#endif
+
+ /* If we could not get the sizes so far, we can only guess them */
+ if (!s->buf_align) {
+ size_t align;
+ buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
+ for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+ if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
+ s->buf_align = align;
+ break;
+ }
+ }
+ qemu_vfree(buf);
+ }
+
+ if (!bs->request_alignment) {
+ size_t align;
+ buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
+ for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+ if (pread(s->fd, buf, align, 0) >= 0) {
+ bs->request_alignment = align;
+ break;
+ }
+ }
+ qemu_vfree(buf);
+ }
+}
+
static void raw_parse_flags(int bdrv_flags, int *open_flags)
{
assert(open_flags != NULL);
@@ -463,7 +535,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
return ret;
}
-
static void raw_reopen_commit(BDRVReopenState *state)
{
BDRVRawReopenState *raw_s = state->opaque;
@@ -499,23 +570,15 @@ static void raw_reopen_abort(BDRVReopenState *state)
state->opaque = NULL;
}
+static int raw_refresh_limits(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
-/* XXX: use host sector size if necessary with:
-#ifdef DIOCGSECTORSIZE
- {
- unsigned int sectorsize = 512;
- if (!ioctl(fd, DIOCGSECTORSIZE, §orsize) &&
- sectorsize > bufsize)
- bufsize = sectorsize;
- }
-#endif
-#ifdef CONFIG_COCOA
- uint32_t blockSize = 512;
- if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
- bufsize = blockSize;
- }
-#endif
-*/
+ raw_probe_alignment(bs);
+ bs->bl.opt_mem_alignment = s->buf_align;
+
+ return 0;
+}
static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
{
@@ -1363,6 +1426,7 @@ static BlockDriver bdrv_file = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1740,6 +1804,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1871,6 +1936,7 @@ static BlockDriver bdrv_host_floppy = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1981,6 +2047,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2110,6 +2177,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 2bad5a3..6fe64d2 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -202,6 +202,35 @@ static int set_sparse(int fd)
NULL, 0, NULL, 0, &returned, NULL);
}
+static void raw_probe_alignment(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ DWORD sectorsPerCluster, freeClusters, totalClusters, count;
+ DISK_GEOMETRY_EX dg;
+ BOOL status;
+
+ if (s->type == FTYPE_CD) {
+ bs->request_alignment = 2048;
+ return;
+ }
+ if (s->type == FTYPE_HARDDISK) {
+ status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
+ NULL, 0, &dg, sizeof(dg), &count, NULL);
+ if (status != 0) {
+ bs->request_alignment = dg.Geometry.BytesPerSector;
+ return;
+ }
+ /* try GetDiskFreeSpace too */
+ }
+
+ if (s->drive_path[0]) {
+ GetDiskFreeSpace(s->drive_path, §orsPerCluster,
+ &dg.Geometry.BytesPerSector,
+ &freeClusters, &totalClusters);
+ bs->request_alignment = dg.Geometry.BytesPerSector;
+ }
+}
+
static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
{
assert(access_flags != NULL);
@@ -269,6 +298,17 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
}
}
+ if (filename[0] && filename[1] == ':') {
+ snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
+ } else if (filename[0] == '\\' && filename[1] == '\\') {
+ s->drive_path[0] = 0;
+ } else {
+ /* Relative path. */
+ char buf[MAX_PATH];
+ GetCurrentDirectory(MAX_PATH, buf);
+ snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
+ }
+
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
OPEN_EXISTING, overlapped, NULL);
@@ -293,6 +333,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
s->aio = aio;
}
+ raw_probe_alignment(bs);
ret = 0;
fail:
qemu_opts_del(opts);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9921cd3..0a01b69 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -319,6 +319,9 @@ struct BlockDriverState {
/* Whether produces zeros when read beyond eof */
bool zero_beyond_eof;
+ /* Alignment requirement for offset/length of I/O requests */
+ unsigned int request_alignment;
+
/* the block size for which the guest device expects atomicity */
int guest_block_size;
--
1.8.1.4
next prev parent reply other threads:[~2013-12-13 13:23 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-13 13:22 [Qemu-devel] [PATCH v2 00/24] block: Support for 512b-on-4k emulatio Kevin Wolf
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 01/24] block: Move initialisation of BlockLimits to bdrv_refresh_limits() Kevin Wolf
2014-01-11 22:24 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 02/24] block: Inherit opt_transfer_length Kevin Wolf
2014-01-11 22:24 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 03/24] block: Update BlockLimits when they might have changed Kevin Wolf
2014-01-11 22:24 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 04/24] qemu_memalign: Allow small alignments Kevin Wolf
2014-01-11 22:25 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 05/24] block: Detect unaligned length in bdrv_qiov_is_aligned() Kevin Wolf
2014-01-11 22:25 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 06/24] block: Don't use guest sector size for qemu_blockalign() Kevin Wolf
2014-01-11 22:25 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 07/24] block: rename buffer_alignment to guest_block_size Kevin Wolf
2014-01-11 22:26 ` Max Reitz
2013-12-13 13:22 ` Kevin Wolf [this message]
2013-12-24 3:39 ` [Qemu-devel] [PATCH v2 08/24] raw: Probe required direct I/O alignment Wenchao Xia
2014-01-11 22:26 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 09/24] block: Introduce bdrv_aligned_preadv() Kevin Wolf
2013-12-24 7:23 ` Wenchao Xia
2014-01-11 22:27 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 10/24] block: Introduce bdrv_co_do_preadv() Kevin Wolf
2013-12-26 4:08 ` Wenchao Xia
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 11/24] block: Introduce bdrv_aligned_pwritev() Kevin Wolf
2014-01-11 22:27 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 12/24] block: write: Handle COR dependency after I/O throttling Kevin Wolf
2014-01-11 22:27 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 13/24] block: Introduce bdrv_co_do_pwritev() Kevin Wolf
2014-01-10 18:11 ` Max Reitz
2014-01-16 12:25 ` Kevin Wolf
2014-01-17 1:38 ` Fam Zheng
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 14/24] block: Switch BdrvTrackedRequest to byte granularity Kevin Wolf
2014-01-10 18:34 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 15/24] block: Allow waiting for overlapping requests between begin/end Kevin Wolf
2014-01-11 22:28 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 16/24] block: Make zero-after-EOF work with larger alignment Kevin Wolf
2014-01-10 19:07 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 17/24] block: Generalise and optimise COR serialisation Kevin Wolf
2014-01-11 22:28 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 18/24] block: Make overlap range for serialisation dynamic Kevin Wolf
2014-01-11 22:28 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 19/24] block: Allow wait_serialising_requests() at any point Kevin Wolf
2013-12-27 4:17 ` Wenchao Xia
2014-01-13 11:29 ` Kevin Wolf
2014-01-14 3:13 ` Wenchao Xia
2014-01-11 22:29 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 20/24] block: Align requests in bdrv_co_do_pwritev() Kevin Wolf
2014-01-11 22:29 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 21/24] block: Change coroutine wrapper to byte granularity Kevin Wolf
2014-01-11 22:29 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 22/24] block: Make bdrv_pread() a bdrv_prwv_co() wrapper Kevin Wolf
2014-01-11 22:29 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 23/24] block: Make bdrv_pwrite() " Kevin Wolf
2014-01-11 22:22 ` Max Reitz
2013-12-13 13:22 ` [Qemu-devel] [PATCH v2 24/24] iscsi: Set bs->request_alignment Kevin Wolf
2014-01-11 22:30 ` Max Reitz
2013-12-27 4:27 ` [Qemu-devel] [PATCH v2 00/24] block: Support for 512b-on-4k emulatio Wenchao Xia
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1386940979-3824-9-git-send-email-kwolf@redhat.com \
--to=kwolf@redhat.com \
--cc=pbonzini@redhat.com \
--cc=pl@kamp.de \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.