From: Christoph Hellwig <hch@lst.de>
To: qemu-devel@nongnu.org
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Subject: [Qemu-devel] [PATCH 2/4] block: add block topology options
Date: Fri, 29 Jan 2010 20:04:40 +0100 [thread overview]
Message-ID: <20100129190440.GA25287@lst.de> (raw)
In-Reply-To: <20100129190417.GA25237@lst.de>
Add three new suboptions for the drive option to export block topology
information to the guest. This is needed to get optimal I/O alignment
for RAID arrays or SSDs.
The options are:
- physical_block_size to specify the physical block size of the device,
this is going to increase from 512 bytes to 4096 kilobytes for many
modern storage devices
- min_io_size to specify the minimal I/O size without performance impact,
this is typically set to the RAID chunk size for arrays.
- opt_io_size to specify the optimal sustained I/O size, this is
typically the RAID stripe width for arrays.
I decided to not auto-probe these values from blkid which might easily
be possible as I don't know how to deal with these issues on migration.
Note that we specificly only set the physical_block_size, and not the
logial one which is the unit all I/O is described in. The reason for
that is that IDE does not support increasing the logical block size and
at last for now I want to stick to one meachnisms in queue and allow
for easy switching of transports for a given backing image which would
not be possible if scsi and virtio use real 4k sectors, while ide only
uses the physical block exponent.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Index: qemu/block.c
===================================================================
--- qemu.orig/block.c 2010-01-29 11:07:50.083004364 +0100
+++ qemu/block.c 2010-01-29 11:08:32.940004255 +0100
@@ -1028,6 +1028,51 @@ int bdrv_enable_write_cache(BlockDriverS
return bs->enable_write_cache;
}
+unsigned int bdrv_get_physical_block_size(BlockDriverState *bs)
+{
+ return bs->physical_block_size;
+}
+
+unsigned int bdrv_get_physical_block_exp(BlockDriverState *bs)
+{
+ unsigned int exp = 0, size;
+
+ for (size = bs->physical_block_size; size > 512; size >>= 1) {
+ exp++;
+ }
+
+ return exp;
+}
+
+void bdrv_set_physical_block_size(BlockDriverState *bs,
+ unsigned int physical_block_size)
+{
+ bs->physical_block_size = physical_block_size;
+}
+
+
+unsigned int bdrv_get_min_io_size(BlockDriverState *bs)
+{
+ return bs->min_io_size;
+}
+
+void bdrv_set_min_io_size(BlockDriverState *bs,
+ unsigned int min_io_size)
+{
+ bs->min_io_size = min_io_size;
+}
+
+unsigned int bdrv_get_opt_io_size(BlockDriverState *bs)
+{
+ return bs->opt_io_size;
+}
+
+void bdrv_set_opt_io_size(BlockDriverState *bs,
+ unsigned int opt_io_size)
+{
+ bs->opt_io_size = opt_io_size;
+}
+
/* XXX: no longer used */
void bdrv_set_change_cb(BlockDriverState *bs,
void (*change_cb)(void *opaque), void *opaque)
Index: qemu/block.h
===================================================================
--- qemu.orig/block.h 2010-01-29 11:07:50.089004011 +0100
+++ qemu/block.h 2010-01-29 11:08:32.940004255 +0100
@@ -152,6 +152,16 @@ int bdrv_is_inserted(BlockDriverState *b
int bdrv_media_changed(BlockDriverState *bs);
int bdrv_is_locked(BlockDriverState *bs);
void bdrv_set_locked(BlockDriverState *bs, int locked);
+unsigned int bdrv_get_physical_block_size(BlockDriverState *bs);
+unsigned int bdrv_get_physical_block_exp(BlockDriverState *bs);
+void bdrv_set_physical_block_size(BlockDriverState *bs,
+ unsigned int physical_block_size);
+unsigned int bdrv_get_min_io_size(BlockDriverState *bs);
+void bdrv_set_min_io_size(BlockDriverState *bs,
+ unsigned int min_io_size);
+unsigned int bdrv_get_opt_io_size(BlockDriverState *bs);
+void bdrv_set_opt_io_size(BlockDriverState *bs,
+ unsigned int opt_io_size);
int bdrv_eject(BlockDriverState *bs, int eject_flag);
void bdrv_set_change_cb(BlockDriverState *bs,
void (*change_cb)(void *opaque), void *opaque);
Index: qemu/block_int.h
===================================================================
--- qemu.orig/block_int.h 2010-01-29 11:07:50.096004065 +0100
+++ qemu/block_int.h 2010-01-29 11:08:32.941003474 +0100
@@ -173,6 +173,14 @@ struct BlockDriverState {
drivers. They are not used by the block driver */
int cyls, heads, secs, translation;
int type;
+
+ /*
+ * Topology information, all optional.
+ */
+ unsigned int physical_block_size;
+ unsigned int min_io_size;
+ unsigned int opt_io_size;
+
char device_name[32];
unsigned long *dirty_bitmap;
BlockDriverState *next;
Index: qemu/qemu-config.c
===================================================================
--- qemu.orig/qemu-config.c 2010-01-29 11:07:50.133004032 +0100
+++ qemu/qemu-config.c 2010-01-29 11:08:32.944025367 +0100
@@ -78,6 +78,15 @@ QemuOptsList qemu_drive_opts = {
},{
.name = "readonly",
.type = QEMU_OPT_BOOL,
+ },{
+ .name = "physical_block_size",
+ .type = QEMU_OPT_NUMBER,
+ },{
+ .name = "min_io_size",
+ .type = QEMU_OPT_NUMBER,
+ },{
+ .name = "opt_io_size",
+ .type = QEMU_OPT_NUMBER,
},
{ /* end if list */ }
},
Index: qemu/vl.c
===================================================================
--- qemu.orig/vl.c 2010-01-29 11:07:50.141004284 +0100
+++ qemu/vl.c 2010-01-29 11:08:32.947003820 +0100
@@ -1904,6 +1904,9 @@ DriveInfo *drive_init(QemuOpts *opts, vo
int index;
int cache;
int aio = 0;
+ unsigned long physical_block_size = 512;
+ unsigned long min_io_size = 0;
+ unsigned long opt_io_size = 0;
int ro = 0;
int bdrv_flags;
int on_read_error, on_write_error;
@@ -2053,6 +2056,32 @@ DriveInfo *drive_init(QemuOpts *opts, vo
}
#endif
+ if ((buf = qemu_opt_get(opts, "physical_block_size")) != NULL) {
+ physical_block_size = strtoul(buf, NULL, 10);
+ if (physical_block_size < 512) {
+ fprintf(stderr, "sector size must be larger than 512 bytes\n");
+ return NULL;
+ }
+ }
+
+ if ((buf = qemu_opt_get(opts, "min_io_size")) != NULL) {
+ min_io_size = strtoul(buf, NULL, 10);
+ if (!min_io_size || (min_io_size % physical_block_size)) {
+ fprintf(stderr,
+ "min_io_size must be a multiple of the sector size\n");
+ return NULL;
+ }
+ }
+
+ if ((buf = qemu_opt_get(opts, "opt_io_size")) != NULL) {
+ opt_io_size = strtoul(buf, NULL, 10);
+ if (!opt_io_size || (opt_io_size % min_io_size)) {
+ fprintf(stderr,
+ "opt_io_size must be a multiple of min_io_size\n");
+ return NULL;
+ }
+ }
+
if ((buf = qemu_opt_get(opts, "format")) != NULL) {
if (strcmp(buf, "?") == 0) {
fprintf(stderr, "qemu: Supported formats:");
@@ -2257,6 +2286,12 @@ DriveInfo *drive_init(QemuOpts *opts, vo
return NULL;
}
+ bdrv_set_physical_block_size(dinfo->bdrv, physical_block_size);
+ if (min_io_size)
+ bdrv_set_min_io_size(dinfo->bdrv, min_io_size);
+ if (opt_io_size)
+ bdrv_set_opt_io_size(dinfo->bdrv, opt_io_size);
+
if (bdrv_key_required(dinfo->bdrv))
autostart = 0;
*fatal_error = 0;
Index: qemu/qemu-options.hx
===================================================================
--- qemu.orig/qemu-options.hx 2010-01-29 11:07:50.149004395 +0100
+++ qemu/qemu-options.hx 2010-01-29 19:36:06.118256061 +0100
@@ -104,6 +104,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
" [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
" [,cache=writethrough|writeback|none][,format=f][,serial=s]\n"
" [,addr=A][,id=name][,aio=threads|native][,readonly=on|off]\n"
+ " [,physical_block=size=size][,min_io_size=size][,opt_io_size=size]\n"
" use 'file' as a drive image\n")
DEF("set", HAS_ARG, QEMU_OPTION_set,
"-set group.id.arg=value\n"
@@ -149,6 +150,15 @@ an untrusted format header.
This option specifies the serial number to assign to the device.
@item addr=@var{addr}
Specify the controller's PCI address (if=virtio only).
+@item physical_sector_size=@var{size}
+Report a physical block size larger than the logical block size of 512 bytes.
+@item min_io_size=@var{size}
+Reported a minimum I/O size or optimum I/O granularity. This is the smallest
+I/O size the device can perform without a performance penalty. For RAID
+devices this should be set to the stripe chunk size.
+@item opt_io_size=@var{size}
+Report an optimal I/O size, which is the device's preferred unit for
+sustained I/O. This should be set to the stripe width for RAID devices.
@end table
By default, writethrough caching is used for all block device. This means that
next prev parent reply other threads:[~2010-01-29 19:04 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-01-29 19:04 [Qemu-devel] [PATCH 1/4] virtio-blk: revert serial number support Christoph Hellwig
2010-01-29 19:04 ` Christoph Hellwig [this message]
2010-02-03 19:00 ` [Qemu-devel] [PATCH 2/4] block: add block topology options Anthony Liguori
2010-02-05 13:09 ` Christoph Hellwig
2010-02-05 16:16 ` Jamie Lokier
2010-02-05 16:22 ` Christoph Hellwig
2010-02-05 17:16 ` Jamie Lokier
2010-02-05 17:33 ` Anthony Liguori
2010-02-09 15:26 ` Markus Armbruster
2010-01-29 19:05 ` [Qemu-devel] [PATCH 3/5] scsi: add topology support Christoph Hellwig
2010-01-29 19:05 ` [Qemu-devel] [PATCH 4/5] ide: " Christoph Hellwig
2010-01-29 19:05 ` [Qemu-devel] [PATCH 5/5] virtio-blk: " Christoph Hellwig
2010-02-01 9:09 ` [Qemu-devel] [PATCH v2 " Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100129190440.GA25287@lst.de \
--to=hch@lst.de \
--cc=martin.petersen@oracle.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).