From: Christoph Hellwig <hch@lst.de>
To: qemu-devel@nongnu.org
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Subject: [Qemu-devel] [PATCH 2/4] block: add block topology options
Date: Fri, 29 Jan 2010 20:04:40 +0100 [thread overview]
Message-ID: <20100129190440.GA25287@lst.de> (raw)
In-Reply-To: <20100129190417.GA25237@lst.de>
Add three new suboptions for the drive option to export block topology
information to the guest. This is needed to get optimal I/O alignment
for RAID arrays or SSDs.
The options are:
- physical_block_size to specify the physical block size of the device,
this is going to increase from 512 bytes to 4096 kilobytes for many
modern storage devices
- min_io_size to specify the minimal I/O size without performance impact,
this is typically set to the RAID chunk size for arrays.
- opt_io_size to specify the optimal sustained I/O size, this is
typically the RAID stripe width for arrays.
I decided to not auto-probe these values from blkid which might easily
be possible as I don't know how to deal with these issues on migration.
Note that we specificly only set the physical_block_size, and not the
logial one which is the unit all I/O is described in. The reason for
that is that IDE does not support increasing the logical block size and
at last for now I want to stick to one meachnisms in queue and allow
for easy switching of transports for a given backing image which would
not be possible if scsi and virtio use real 4k sectors, while ide only
uses the physical block exponent.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Index: qemu/block.c
===================================================================
--- qemu.orig/block.c 2010-01-29 11:07:50.083004364 +0100
+++ qemu/block.c 2010-01-29 11:08:32.940004255 +0100
@@ -1028,6 +1028,51 @@ int bdrv_enable_write_cache(BlockDriverS
return bs->enable_write_cache;
}
+unsigned int bdrv_get_physical_block_size(BlockDriverState *bs)
+{
+ return bs->physical_block_size;
+}
+
+unsigned int bdrv_get_physical_block_exp(BlockDriverState *bs)
+{
+ unsigned int exp = 0, size;
+
+ for (size = bs->physical_block_size; size > 512; size >>= 1) {
+ exp++;
+ }
+
+ return exp;
+}
+
+void bdrv_set_physical_block_size(BlockDriverState *bs,
+ unsigned int physical_block_size)
+{
+ bs->physical_block_size = physical_block_size;
+}
+
+
+unsigned int bdrv_get_min_io_size(BlockDriverState *bs)
+{
+ return bs->min_io_size;
+}
+
+void bdrv_set_min_io_size(BlockDriverState *bs,
+ unsigned int min_io_size)
+{
+ bs->min_io_size = min_io_size;
+}
+
+unsigned int bdrv_get_opt_io_size(BlockDriverState *bs)
+{
+ return bs->opt_io_size;
+}
+
+void bdrv_set_opt_io_size(BlockDriverState *bs,
+ unsigned int opt_io_size)
+{
+ bs->opt_io_size = opt_io_size;
+}
+
/* XXX: no longer used */
void bdrv_set_change_cb(BlockDriverState *bs,
void (*change_cb)(void *opaque), void *opaque)
Index: qemu/block.h
===================================================================
--- qemu.orig/block.h 2010-01-29 11:07:50.089004011 +0100
+++ qemu/block.h 2010-01-29 11:08:32.940004255 +0100
@@ -152,6 +152,16 @@ int bdrv_is_inserted(BlockDriverState *b
int bdrv_media_changed(BlockDriverState *bs);
int bdrv_is_locked(BlockDriverState *bs);
void bdrv_set_locked(BlockDriverState *bs, int locked);
+unsigned int bdrv_get_physical_block_size(BlockDriverState *bs);
+unsigned int bdrv_get_physical_block_exp(BlockDriverState *bs);
+void bdrv_set_physical_block_size(BlockDriverState *bs,
+ unsigned int physical_block_size);
+unsigned int bdrv_get_min_io_size(BlockDriverState *bs);
+void bdrv_set_min_io_size(BlockDriverState *bs,
+ unsigned int min_io_size);
+unsigned int bdrv_get_opt_io_size(BlockDriverState *bs);
+void bdrv_set_opt_io_size(BlockDriverState *bs,
+ unsigned int opt_io_size);
int bdrv_eject(BlockDriverState *bs, int eject_flag);
void bdrv_set_change_cb(BlockDriverState *bs,
void (*change_cb)(void *opaque), void *opaque);
Index: qemu/block_int.h
===================================================================
--- qemu.orig/block_int.h 2010-01-29 11:07:50.096004065 +0100
+++ qemu/block_int.h 2010-01-29 11:08:32.941003474 +0100
@@ -173,6 +173,14 @@ struct BlockDriverState {
drivers. They are not used by the block driver */
int cyls, heads, secs, translation;
int type;
+
+ /*
+ * Topology information, all optional.
+ */
+ unsigned int physical_block_size;
+ unsigned int min_io_size;
+ unsigned int opt_io_size;
+
char device_name[32];
unsigned long *dirty_bitmap;
BlockDriverState *next;
Index: qemu/qemu-config.c
===================================================================
--- qemu.orig/qemu-config.c 2010-01-29 11:07:50.133004032 +0100
+++ qemu/qemu-config.c 2010-01-29 11:08:32.944025367 +0100
@@ -78,6 +78,15 @@ QemuOptsList qemu_drive_opts = {
},{
.name = "readonly",
.type = QEMU_OPT_BOOL,
+ },{
+ .name = "physical_block_size",
+ .type = QEMU_OPT_NUMBER,
+ },{
+ .name = "min_io_size",
+ .type = QEMU_OPT_NUMBER,
+ },{
+ .name = "opt_io_size",
+ .type = QEMU_OPT_NUMBER,
},
{ /* end if list */ }
},
Index: qemu/vl.c
===================================================================
--- qemu.orig/vl.c 2010-01-29 11:07:50.141004284 +0100
+++ qemu/vl.c 2010-01-29 11:08:32.947003820 +0100
@@ -1904,6 +1904,9 @@ DriveInfo *drive_init(QemuOpts *opts, vo
int index;
int cache;
int aio = 0;
+ unsigned long physical_block_size = 512;
+ unsigned long min_io_size = 0;
+ unsigned long opt_io_size = 0;
int ro = 0;
int bdrv_flags;
int on_read_error, on_write_error;
@@ -2053,6 +2056,32 @@ DriveInfo *drive_init(QemuOpts *opts, vo
}
#endif
+ if ((buf = qemu_opt_get(opts, "physical_block_size")) != NULL) {
+ physical_block_size = strtoul(buf, NULL, 10);
+ if (physical_block_size < 512) {
+ fprintf(stderr, "sector size must be larger than 512 bytes\n");
+ return NULL;
+ }
+ }
+
+ if ((buf = qemu_opt_get(opts, "min_io_size")) != NULL) {
+ min_io_size = strtoul(buf, NULL, 10);
+ if (!min_io_size || (min_io_size % physical_block_size)) {
+ fprintf(stderr,
+ "min_io_size must be a multiple of the sector size\n");
+ return NULL;
+ }
+ }
+
+ if ((buf = qemu_opt_get(opts, "opt_io_size")) != NULL) {
+ opt_io_size = strtoul(buf, NULL, 10);
+ if (!opt_io_size || (opt_io_size % min_io_size)) {
+ fprintf(stderr,
+ "opt_io_size must be a multiple of min_io_size\n");
+ return NULL;
+ }
+ }
+
if ((buf = qemu_opt_get(opts, "format")) != NULL) {
if (strcmp(buf, "?") == 0) {
fprintf(stderr, "qemu: Supported formats:");
@@ -2257,6 +2286,12 @@ DriveInfo *drive_init(QemuOpts *opts, vo
return NULL;
}
+ bdrv_set_physical_block_size(dinfo->bdrv, physical_block_size);
+ if (min_io_size)
+ bdrv_set_min_io_size(dinfo->bdrv, min_io_size);
+ if (opt_io_size)
+ bdrv_set_opt_io_size(dinfo->bdrv, opt_io_size);
+
if (bdrv_key_required(dinfo->bdrv))
autostart = 0;
*fatal_error = 0;
Index: qemu/qemu-options.hx
===================================================================
--- qemu.orig/qemu-options.hx 2010-01-29 11:07:50.149004395 +0100
+++ qemu/qemu-options.hx 2010-01-29 19:36:06.118256061 +0100
@@ -104,6 +104,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
" [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
" [,cache=writethrough|writeback|none][,format=f][,serial=s]\n"
" [,addr=A][,id=name][,aio=threads|native][,readonly=on|off]\n"
+ " [,physical_block=size=size][,min_io_size=size][,opt_io_size=size]\n"
" use 'file' as a drive image\n")
DEF("set", HAS_ARG, QEMU_OPTION_set,
"-set group.id.arg=value\n"
@@ -149,6 +150,15 @@ an untrusted format header.
This option specifies the serial number to assign to the device.
@item addr=@var{addr}
Specify the controller's PCI address (if=virtio only).
+@item physical_sector_size=@var{size}
+Report a physical block size larger than the logical block size of 512 bytes.
+@item min_io_size=@var{size}
+Reported a minimum I/O size or optimum I/O granularity. This is the smallest
+I/O size the device can perform without a performance penalty. For RAID
+devices this should be set to the stripe chunk size.
+@item opt_io_size=@var{size}
+Report an optimal I/O size, which is the device's preferred unit for
+sustained I/O. This should be set to the stripe width for RAID devices.
@end table
By default, writethrough caching is used for all block device. This means that
next prev parent reply other threads:[~2010-01-29 19:04 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-01-29 19:04 [Qemu-devel] [PATCH 1/4] virtio-blk: revert serial number support Christoph Hellwig
2010-01-29 19:04 ` Christoph Hellwig [this message]
2010-02-03 19:00 ` [Qemu-devel] [PATCH 2/4] block: add block topology options Anthony Liguori
2010-02-05 13:09 ` Christoph Hellwig
2010-02-05 16:16 ` Jamie Lokier
2010-02-05 16:22 ` Christoph Hellwig
2010-02-05 17:16 ` Jamie Lokier
2010-02-05 17:33 ` Anthony Liguori
2010-02-09 15:26 ` Markus Armbruster
2010-01-29 19:05 ` [Qemu-devel] [PATCH 3/5] scsi: add topology support Christoph Hellwig
2010-01-29 19:05 ` [Qemu-devel] [PATCH 4/5] ide: " Christoph Hellwig
2010-01-29 19:05 ` [Qemu-devel] [PATCH 5/5] virtio-blk: " Christoph Hellwig
2010-02-01 9:09 ` [Qemu-devel] [PATCH v2 " Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100129190440.GA25287@lst.de \
--to=hch@lst.de \
--cc=martin.petersen@oracle.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.