From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:60117) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gaHs4-000242-EN for qemu-devel@nongnu.org; Fri, 21 Dec 2018 05:17:34 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1gaHs2-0004a6-Iu for qemu-devel@nongnu.org; Fri, 21 Dec 2018 05:17:32 -0500 From: Yaowei Bai Date: Fri, 21 Dec 2018 18:16:27 +0800 Message-Id: <1545387387-9613-1-git-send-email-baiyaowei@cmss.chinamobile.com> Subject: [Qemu-devel] [PATCH] tcmu: Introduce qemu-tcmu utility List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-block@nongnu.org, qemu-devel@nongnu.org Cc: Yaowei Bai , Mike Christie , Amar Tumballi , Prasanna Kalever , Paolo Bonzini , Fam Zheng , Xiubo Li This patch introduces a new utility, qemu-tcmu. Apart from the underlaying protocol it interacts with the world much like qemu-nbd. This patch bases on Fam's version. Qemu-tcmu handles SCSI commands which are passed through userspace from kernel by LIO subsystem using TCMU protocol. Libtcmu is the library for processing TCMU protocol in userspace. With qemu-tcmu, we can export images/formats like qcow2, rbd, etc. that qemu supports using iSCSI protocol or loopback for remote or local access. Currently qemu-tcmu implements several SCSI command helper functions to work. Our goal is to refactor and reuse SCSI code in scsi-disk. Please refer to docs/tcmu.txt to use qemu-tcmu. We test it on CentOS 7.3.(Please use 3.10.0-514 or lower version kernel, there's one issuse in higher kernel version we're resolving.) Cc: Mike Christie Cc: Amar Tumballi Cc: Prasanna Kalever Cc: Paolo Bonzini Signed-off-by: Fam Zheng Signed-off-by: Yaowei Bai Signed-off-by: Xiubo Li --- Makefile | 1 + Makefile.objs | 3 +- configure | 45 ++++ docs/tcmu.txt | 91 +++++++ include/tcmu/tcmu.h | 14 + qemu-tcmu.c | 214 +++++++++++++++ tcmu/Makefile.objs | 5 + tcmu/helper.c | 741 ++++++++++++++++++++++++++++++++++++++++++++++++++++ tcmu/helper.h | 31 +++ tcmu/tcmu.c | 598 ++++++++++++++++++++++++++++++++++++++++++ tcmu/trace-events | 12 + 11 files changed, 1754 insertions(+), 1 deletion(-) create mode 100644 docs/tcmu.txt create mode 100644 include/tcmu/tcmu.h create mode 100644 qemu-tcmu.c create mode 100644 tcmu/Makefile.objs create mode 100644 tcmu/helper.c create mode 100644 tcmu/helper.h create mode 100644 tcmu/tcmu.c create mode 100644 tcmu/trace-events diff --git a/Makefile b/Makefile index 038780c..351e9d4 100644 --- a/Makefile +++ b/Makefile @@ -483,6 +483,7 @@ qemu-img.o: qemu-img-cmds.h qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS) qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS) qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS) +qemu-tcmu$(EXESUF): qemu-tcmu.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS) qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS) diff --git a/Makefile.objs b/Makefile.objs index 56af034..8f96c42 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -26,7 +26,7 @@ block-obj-y += block/ scsi/ block-obj-y += qemu-io-cmds.o block-obj-$(CONFIG_REPLICATION) += replication.o -block-obj-m = block/ +block-obj-m = block/ tcmu/ ####################################################################### # crypto-obj-y is code used by both qemu system emulation and qemu-img @@ -196,6 +196,7 @@ trace-events-subdirs += target/mips trace-events-subdirs += target/ppc trace-events-subdirs += target/s390x trace-events-subdirs += target/sparc +trace-events-subdirs += tcmu trace-events-subdirs += ui trace-events-subdirs += util diff --git a/configure b/configure index 224d307..d41e4e9 100755 --- a/configure +++ b/configure @@ -346,6 +346,7 @@ fdt="" netmap="no" sdl="" sdlabi="" +tcmu="" virtfs="" mpath="" vnc="yes" @@ -1034,6 +1035,10 @@ for opt do # configure to be used by RPM and similar macros that set # lots of directory switches by default. ;; + --enable-tcmu) tcmu="yes" + ;; + --disable-tcmu) tcmu="no" + ;; --disable-sdl) sdl="no" ;; --enable-sdl) sdl="yes" @@ -3607,6 +3612,36 @@ else fi ########################################## +# tcmu support probe + +if test "$tcmu" != "no"; then + # Sanity check for gio-unix-2.0 (part of glib2), cannot fail unless something + # is very wrong. + if ! $pkg_config gio-unix-2.0; then + error_exit "glib is required to compile QEMU" + fi + cat > $TMPC < +#include + +int main(int argc, char **argv) +{ + struct tcmulib_context *ctx = tcmulib_initialize(NULL, 0); + tcmulib_register(ctx); + return ctx != NULL; +} +EOF + if compile_prog "" "-ltcmu" ; then + tcmu=yes + tcmu_libs="-ltcmu" + elif test "$tcmu" == "yes"; then + feature_not_found "libtcmu" "Install libtcmu devel (>=1.0.5)" + else + tcmu=no + fi +fi + +########################################## # libmpathpersist probe if test "$mpath" != "no" ; then @@ -5756,6 +5791,9 @@ if test "$want_tools" = "yes" ; then if [ "$posix" = "yes" ] && [ "$curl" = "yes" ]; then tools="elf2dmp $tools" fi + if [ "$linux" = "yes" -a "$tcmu" = "yes" ] ; then + tools="qemu-tcmu\$(EXESUF) $tools" + fi fi if test "$softmmu" = yes ; then if test "$linux" = yes; then @@ -6142,6 +6180,7 @@ echo "capstone $capstone" echo "docker $docker" echo "libpmem support $libpmem" echo "libudev $libudev" +echo "tcmu support $tcmu" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -6782,6 +6821,12 @@ if test "$live_block_migration" = "yes" ; then echo "CONFIG_LIVE_BLOCK_MIGRATION=y" >> $config_host_mak fi +if test "$tcmu" = "yes" ; then + echo "CONFIG_TCMU=m" >> $config_host_mak + echo "TCMU_CFLAGS=$tcmu_cflags" >> $config_host_mak + echo "TCMU_LIBS=$tcmu_libs" >> $config_host_mak +fi + if test "$tpm" = "yes"; then echo 'CONFIG_TPM=$(CONFIG_SOFTMMU)' >> $config_host_mak # TPM passthrough support? diff --git a/docs/tcmu.txt b/docs/tcmu.txt new file mode 100644 index 0000000..ffe5f85 --- /dev/null +++ b/docs/tcmu.txt @@ -0,0 +1,91 @@ +Introduction +------------------------- +TCMU is the abbreviation of TCM in Userspace and TCM is another +name for LIO, an ISCSI target in Linux kernel. TCM can serve +file, block device, RAM, etc as storage backend for ISCSI target +totally in kernel. But for userspace storage like Glusterfs and +Ceph, it's hard for TCM to handle as backend storage. TCMU is used +in this situation by utilizing UIO ring buffer to passthrough +userspace so a userspace program can process SCSI command by handling +TCMU protocol. Qemu-tcmu is such userspace program which can export +any format/protocol that QEMU supports as ISCSI target or loopback +by linking to libtcmu in tcmu-runner(a userspace helper daemon to +handle TCMU interfaces). + +Installation +-------------------- +Qemu-tcmu depends on libtcmu/tcmu-runner to handle TCMU userspace +interfaces and targetcli-fb and other utilities to manage ISCSI +targets. + +1. install and config tcmu-runner + + # git clone https://github.com/open-iscsi/tcmu-runner + # cd tcmu-runner + # cmake -DSUPPORT_SYSTEMD=ON -DCMAKE_INSTALL_PREFIX=/usr + # make install + # systemctl daemon-reload + # systemctl enable tcmu-runner + # systemctl start tcmu-runner + +2. install rtslib-fb + + # git clone https://github.com/open-iscsi/rtslib-fb.git + # cd rtslib-fb + # python setup.py install + +3. install configshell-fb + + # git clone https://github.com/open-iscsi/configshell-fb.git + # cd configshell-fb + # python setup.py install + +4. install targetcli-fb + + # git clone https://github.com/open-iscsi/targetcli-fb.git + # cd targetcli-fb + # python setup.py install + +5. install qemu-tcmu + + # git clone https://github.com/qemu/qemu.git + # cd qemu + # ./configure --target-list=x86_64-softmmu \ + --enable-libiscsi \ + --enable-tcmu + # make -j + # make -j install + +Now we can use qemu-tcmu to export images. + +1. create backend storage file + + # qemu-img create test.file 1G + +2. load TCMU kernel module + + # modprobe target_core_user + +3. start qemu-tcmu + + # qemu-tcmu + +4. configure ISCSI target via targetcli + + # IQN=iqn.2016-11.org.test:qemu-tcmu-test + # targetcli /backstores/user:qemu create qemulun 1G @id=test@file=/root/test.file + # targetcli /iscsi create $IQN + # targetcli /iscsi/$IQN/tpg1 set attribute \ + authentication=0 \ + generate_node_acls=1 \ + demo_mode_write_protect=0 \ + prod_mode_write_protect=0 + # targetcli /iscsi/$IQN/tpg1/luns create /backstores/user:qemu/qemulun + +Then you can connect this exported target on another initiator host. + +Others +------ +More infomation about TCMU and tcmu-runner please refer to +Documentation/target/tcmu-design.txt in Linux kernel and +https://github.com/open-iscsi/tcmu-runner. diff --git a/include/tcmu/tcmu.h b/include/tcmu/tcmu.h new file mode 100644 index 0000000..656a545 --- /dev/null +++ b/include/tcmu/tcmu.h @@ -0,0 +1,14 @@ +#ifndef QEMU_TCMU_H +#define QEMU_TCMU_H + +#include "qemu-common.h" + +typedef struct TCMUExport TCMUExport; +extern QemuOptsList qemu_tcmu_export_opts; + +void qemu_tcmu_stop(void); +void qemu_tcmu_start(const char *subtype, Error **errp); +TCMUExport *tcmu_export_new(BlockBackend *blk, bool writable, Error **errp); +int export_init_func(void *opaque, QemuOpts *all_opts, Error **errp); + +#endif diff --git a/qemu-tcmu.c b/qemu-tcmu.c new file mode 100644 index 0000000..85e348f --- /dev/null +++ b/qemu-tcmu.c @@ -0,0 +1,214 @@ +/* + * Copyright 2016 Red Hat, Inc. + * + * TCMU Handler Program + * + * Authors: + * Fam Zheng + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/cutils.h" +#include "sysemu/block-backend.h" +#include "block/block_int.h" +#include "qemu/main-loop.h" +#include "qemu/error-report.h" +#include "qemu/config-file.h" +#include "qemu/bswap.h" +#include "qemu/log.h" +#include "qemu/option.h" +#include "block/snapshot.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qom/object_interfaces.h" +#include "crypto/init.h" +#include "trace/control.h" +#include "tcmu/tcmu.h" +#include +#include "qemu-version.h" + +#define QEMU_TCMU_OPT_OBJECT 260 + +static int verbose; +static enum { RUNNING, TERMINATING, TERMINATED } state; + +static void usage(const char *name) +{ + (printf) ( +"Usage:\n" +"%s [OPTIONS]\n" +"QEMU TCMU Handler\n" +"\n" +" -h, --help display this help and exit\n" +" -V, --version output version information and exit\n" +"\n" +"General purpose options:\n" +" -v, --verbose display extra debugging information\n" +" -x, --handler-name=NAME handler name to be used as the subtype for TCMU\n" +" --object type,id=ID,... define an object such as 'secret' for providing\n" +" passwords and/or encryption keys\n" +" -T, --trace [[enable=]][,events=][,file=]\n" +" specify tracing options\n" +"\n" +"Report bugs to \n" + , name); +} + +static void version(const char *name) +{ + printf("%s v" QEMU_FULL_VERSION "\n", name); +} + +static void termsig_handler(int signum) +{ + atomic_cmpxchg(&state, RUNNING, TERMINATING); + qemu_notify_event(); +} + +static QemuOptsList qemu_object_opts = { + .name = "object", + .implied_opt_name = "qom-type", + .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head), + .desc = { + { } + }, +}; + +static void qemu_tcmu_shutdown(void) +{ + job_cancel_sync_all(); + bdrv_close_all(); +} + +int main(int argc, char **argv) +{ + const char *sopt = "hVvx:T:"; + bool starting = true; + struct option lopt[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { "verbose", no_argument, NULL, 'v' }, + { "object", required_argument, NULL, QEMU_TCMU_OPT_OBJECT }, + { "handler-name", required_argument, NULL, 'x' }, + { "trace", required_argument, NULL, 'T' }, + { NULL, 0, NULL, 0 } + }; + int ch; + int opt_ind = 0; + Error *local_err = NULL; + char *trace_file = NULL; + const char *subtype = "qemu"; + + struct sigaction sa_sigterm; + memset(&sa_sigterm, 0, sizeof(sa_sigterm)); + sa_sigterm.sa_handler = termsig_handler; + sigaction(SIGTERM, &sa_sigterm, NULL); + sigaction(SIGINT, &sa_sigterm, NULL); + + module_call_init(MODULE_INIT_TRACE); + qcrypto_init(&error_fatal); + + module_call_init(MODULE_INIT_QOM); + qemu_add_opts(&qemu_object_opts); + qemu_add_opts(&qemu_trace_opts); + qemu_init_exec_dir(argv[0]); + + while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { + switch (ch) { + case 'x': + subtype = optarg; + break; + case 'v': + verbose = 1; + break; + case 'V': + version(argv[0]); + exit(0); + break; + case 'h': + usage(argv[0]); + exit(0); + break; + case '?': + error_report("Try `%s --help' for more information.", argv[0]); + exit(EXIT_FAILURE); + case QEMU_TCMU_OPT_OBJECT: { + QemuOpts *opts; + opts = qemu_opts_parse_noisily(&qemu_object_opts, + optarg, true); + if (!opts) { + exit(EXIT_FAILURE); + } + } break; + case 'T': + g_free(trace_file); + trace_file = trace_opt_parse(optarg); + break; + } + } + + if ((argc - optind) != 0) { + error_report("Invalid number of arguments"); + error_printf("Try `%s --help' for more information.\n", argv[0]); + exit(EXIT_FAILURE); + } + + if (qemu_opts_foreach(&qemu_object_opts, + user_creatable_add_opts_foreach, + NULL, NULL)) { + exit(EXIT_FAILURE); + } + + if (!trace_init_backends()) { + exit(1); + } + trace_init_file(trace_file); + qemu_set_log(LOG_TRACE); + + if (qemu_init_main_loop(&local_err)) { + error_report_err(local_err); + exit(EXIT_FAILURE); + } + bdrv_init(); + atexit(qemu_tcmu_shutdown); + + /* now when the initialization is (almost) complete, chdir("/") + * to free any busy filesystems */ + if (chdir("/") < 0) { + error_report("Could not chdir to root directory: %s", + strerror(errno)); + exit(EXIT_FAILURE); + } + + state = RUNNING; + do { + main_loop_wait(starting); + if (starting) { + qemu_tcmu_start(subtype, &local_err); + if (local_err) { + error_report_err(local_err); + exit(EXIT_FAILURE); + } + starting = false; + } + if (state == TERMINATING) { + state = TERMINATED; + qemu_tcmu_stop(); + } + } while (state != TERMINATED); + + exit(EXIT_SUCCESS); +} diff --git a/tcmu/Makefile.objs b/tcmu/Makefile.objs new file mode 100644 index 0000000..9ffa5b9 --- /dev/null +++ b/tcmu/Makefile.objs @@ -0,0 +1,5 @@ +block-obj-$(CONFIG_TCMU) += tcmu.mo + +tcmu.mo-objs := tcmu.o helper.o +tcmu.mo-cflags := $(TCMU_CFLAGS) +tcmu.mo-libs := $(TCMU_LIBS) diff --git a/tcmu/helper.c b/tcmu/helper.c new file mode 100644 index 0000000..0b86b4d --- /dev/null +++ b/tcmu/helper.c @@ -0,0 +1,741 @@ +/* + * Copyright (c) 2014 Red Hat, Inc. + * + * This file is licensed to you under your choice of the GNU Lesser + * General Public License, version 2.1 or any later version (LGPLv2.1 or + * later), or the Apache License 2.0. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "scsi/constants.h" +#include "libtcmu.h" +#include "helper.h" + +static int tcmu_emulate_std_inquiry( + uint8_t *cdb, + struct iovec *iovec, + size_t iov_cnt) +{ + uint8_t buf[36]; + + memset(buf, 0, sizeof(buf)); + + buf[2] = 0x05; /* SPC-3 */ + buf[3] = 0x02; /* response data format */ + + /* + * A Third-Party Copy (3PC) + * + * Enable the XCOPY + */ + buf[5] = 0x08; + + buf[7] = 0x02; /* CmdQue */ + + memcpy(&buf[8], "LIO-ORG ", 8); + memset(&buf[16], 0x20, 16); + memcpy(&buf[16], "TCMU device", 11); + memcpy(&buf[32], "0002", 4); + buf[4] = 31; /* Set additional length to 31 */ + + tcmu_memcpy_into_iovec(iovec, iov_cnt, buf, sizeof(buf)); + return TCMU_STS_OK; +} + +/* This func from CCAN str/hex/hex.c. Public Domain */ +static bool char_to_hex(unsigned char *val, char c) +{ + if (c >= '0' && c <= '9') { + *val = c - '0'; + return true; + } + if (c >= 'a' && c <= 'f') { + *val = c - 'a' + 10; + return true; + } + if (c >= 'A' && c <= 'F') { + *val = c - 'A' + 10; + return true; + } + return false; +} + +static int tcmu_emulate_evpd_inquiry( + struct tcmu_device *dev, + uint8_t *cdb, + struct iovec *iovec, + size_t iov_cnt) +{ + switch (cdb[2]) { + case 0x0: /* Supported VPD pages */ + { + char data[16]; + + memset(data, 0, sizeof(data)); + + /* data[1] (page code) already 0 */ + /* + * spc4r22 7.7.13 The supported VPD page list shall contain + * a list of all VPD page codes (see 7.7) implemented by the + * logical unit in ascending order beginning with page code 00h + */ + data[4] = 0x00; + data[5] = 0x80; + data[6] = 0x83; + data[7] = 0xb0; + data[8] = 0xb1; + data[9] = 0xb2; + + data[3] = 6; + + tcmu_memcpy_into_iovec(iovec, iov_cnt, data, sizeof(data)); + return TCMU_STS_OK; + } + break; + case 0x80: /* Unit Serial Number */ + { + char data[512]; + char *wwn; + uint32_t len; + + memset(data, 0, sizeof(data)); + + data[1] = 0x80; + + wwn = tcmu_cfgfs_dev_get_wwn(dev); + if (!wwn) + return TCMU_STS_HW_ERR; + + /* + * The maximum length of the unit_serial has limited + * to 254 Bytes in kernel, so here limit to 256 Bytes + * will be enough. + */ + len = snprintf(&data[4], 256, "%s", wwn); + data[3] = len + 1; + + tcmu_memcpy_into_iovec(iovec, iov_cnt, data, sizeof(data)); + + free(wwn); + return TCMU_STS_OK; + } + break; + case 0x83: /* Device identification */ + { + char data[512]; + char *ptr, *p, *wwn; + size_t len, used = 0; + uint16_t *tot_len = (uint16_t*) &data[2]; + bool next; + int i; + + memset(data, 0, sizeof(data)); + + data[1] = 0x83; + + wwn = tcmu_cfgfs_dev_get_wwn(dev); + if (!wwn) + return TCMU_STS_HW_ERR; + + ptr = &data[4]; + + /* 1/5: T10 Vendor id */ + ptr[0] = 2; /* code set: ASCII */ + ptr[1] = 1; /* identifier: T10 vendor id */ + memcpy(&ptr[4], "LIO-ORG ", 8); + len = snprintf(&ptr[12], sizeof(data) - 16, "%s", wwn); + + ptr[3] = 8 + len + 1; + used += (uint8_t)ptr[3] + 4; + ptr += used; + + /* 2/5: NAA binary */ + ptr[0] = 1; /* code set: binary */ + ptr[1] = 3; /* identifier: NAA */ + ptr[3] = 16; /* body length for naa registered extended format */ + + /* + * Set type 6 and use OpenFabrics IEEE Company ID: 00 14 05 + */ + ptr[4] = 0x60; + ptr[5] = 0x01; + ptr[6] = 0x40; + ptr[7] = 0x50; + + /* + * Fill in the rest with a binary representation of WWN + * + * This implementation only uses a nibble out of every byte of + * WWN, but this is what the kernel does, and it's nice for our + * values to match. + */ + next = true; + for (p = wwn, i = 7; *p && i < 20; p++) { + uint8_t val; + + if (!char_to_hex(&val, *p)) + continue; + + if (next) { + next = false; + ptr[i++] |= val; + } else { + next = true; + ptr[i] = val << 4; + } + } + + used += 20; + ptr += 20; + + /* 3/6: Vendor specific */ + ptr[0] = 2; /* code set: ASCII */ + ptr[1] = 0; /* identifier: vendor-specific */ + + len = snprintf(&ptr[4], sizeof(data) - used - 4, "%s", tcmu_dev_get_cfgstring(dev)); + ptr[3] = len + 1; + + used += (uint8_t)ptr[3] + 4; + ptr += (uint8_t)ptr[3] + 4; + + /* Done with descriptor list */ + + *tot_len = htobe16(used); + + tcmu_memcpy_into_iovec(iovec, iov_cnt, data, used + 4); + + free(wwn); + wwn = NULL; + + return TCMU_STS_OK; + } + break; + case 0xb0: /* Block Limits */ + { + char data[64]; + uint32_t max_xfer_length; + uint16_t val16; + uint32_t val32; + + memset(data, 0, sizeof(data)); + + data[1] = 0xb0; + + val16 = htobe16(0x3c); + memcpy(&data[2], &val16, 2); + + /* WSNZ = 1: the device server won't support a value of zero + * in the NUMBER OF LOGICAL BLOCKS field in the WRITE SAME + * command CDBs + */ + data[4] = 0x01; + + /* + * Daemons like runner may override the user requested + * value due to device specific limits. + */ + max_xfer_length = tcmu_dev_get_max_xfer_len(dev); + + val32 = htobe32(max_xfer_length); + /* Max xfer length */ + memcpy(&data[8], &val32, 4); + /* Optimal xfer length */ + memcpy(&data[12], &val32, 4); + + tcmu_memcpy_into_iovec(iovec, iov_cnt, data, sizeof(data)); + + return TCMU_STS_OK; + } + break; + case 0xb1: /* Block Device Characteristics VPD page */ + { + char data[64]; + uint16_t val16; + + memset(data, 0, sizeof(data)); + + /* + * From spc-5 Revision 14, section 6.7.2 Standard INQUIRY data + * set the devive type to Direct access block device. + */ + data[0] = 0x00; + + /* PAGE CODE (B1h) */ + data[1] = 0xb1; + + /* PAGE LENGTH (003Ch)*/ + val16 = htobe16(0x003c); + memcpy(&data[2], &val16, 2); + + if (tcmu_dev_get_solid_state_media(dev)) { + val16 = htobe16(0x0001); + memcpy(&data[4], &val16, 2); + } + + tcmu_memcpy_into_iovec(iovec, iov_cnt, data, sizeof(data)); + return TCMU_STS_OK; + } + break; + case 0xb2: /* Logical Block Provisioning VPD page */ + { + char data[64]; + uint16_t val16; + + memset(data, 0, sizeof(data)); + + /* + * From spc-5 Revision 14, section 6.7.2 Standard INQUIRY data + * set the device type to Direct access block device. + */ + data[0] = 0x00; + + /* PAGE CODE (B2h) */ + data[1] = 0xb2; + + /* + * PAGE LENGTH field: PROVISIONING GROUP DESCRIPTOR field will be + * not present. + */ + val16 = htobe16(0x0004); + memcpy(&data[2], &val16, 2); + + /* + * The logical block provisioning read zeros (LBPRZ) field. + * + * The logical block data represented by unmapped LBAs is set to zeros + */ + data[5] = 0x04; + + tcmu_memcpy_into_iovec(iovec, iov_cnt, data, sizeof(data)); + return TCMU_STS_OK; + } + break; + default: + error_report("Vital product data page code 0x%x not support\n", + cdb[2]); + return TCMU_STS_INVALID_CDB; + } +} + +/* + * Emulate INQUIRY(0x12) + */ +int tcmu_emulate_inquiry( + struct tcmu_device *dev, + uint8_t *cdb, + struct iovec *iovec, + size_t iov_cnt) +{ + if (!(cdb[1] & 0x01)) { + if (!cdb[2]) + return tcmu_emulate_std_inquiry(cdb, iovec, + iov_cnt); + else + return TCMU_STS_INVALID_CDB; + } else { + return tcmu_emulate_evpd_inquiry(dev, cdb, iovec, iov_cnt); + } +} + +int tcmu_emulate_test_unit_ready( + uint8_t *cdb, + struct iovec *iovec, + size_t iov_cnt) +{ + return TCMU_STS_OK; +} + +int tcmu_emulate_read_capacity_10( + uint64_t num_lbas, + uint32_t block_size, + uint8_t *cdb, + struct iovec *iovec, + size_t iov_cnt) +{ + uint8_t buf[8]; + uint32_t val32; + + memset(buf, 0, sizeof(buf)); + + if (num_lbas < 0x100000000ULL) { + // Return the LBA of the last logical block, so subtract 1. + val32 = htobe32(num_lbas-1); + } else { + // This lets the initiator know that he needs to use + // Read Capacity(16). + val32 = 0xffffffff; + } + + memcpy(&buf[0], &val32, 4); + + val32 = htobe32(block_size); + memcpy(&buf[4], &val32, 4); + + /* all else is zero */ + + tcmu_memcpy_into_iovec(iovec, iov_cnt, buf, sizeof(buf)); + + return TCMU_STS_OK; +} + +int tcmu_emulate_read_capacity_16( + uint64_t num_lbas, + uint32_t block_size, + uint8_t *cdb, + struct iovec *iovec, + size_t iov_cnt) +{ + uint8_t buf[32]; + uint64_t val64; + uint32_t val32; + + memset(buf, 0, sizeof(buf)); + + // Return the LBA of the last logical block, so subtract 1. + val64 = htobe64(num_lbas-1); + memcpy(&buf[0], &val64, 8); + + val32 = htobe32(block_size); + memcpy(&buf[8], &val32, 4); + + /* + * Logical Block Provisioning Management Enabled (LBPME) bit + * + * The LBPME bit sets to one and then the logical unit implements + * logical block provisioning management + */ + buf[14] = 0x80; + + /* + * The logical block provisioning read zeros (LBPRZ) bit shall be + * set to one if the LBPRZ field is set to xx1b in VPD B2. The + * LBPRZ bit shall be set to zero if the LBPRZ field is not set + * to xx1b. + */ + buf[14] |= 0x40; + + /* all else is zero */ + + tcmu_memcpy_into_iovec(iovec, iov_cnt, buf, sizeof(buf)); + + return TCMU_STS_OK; +} + +static void copy_to_response_buf(uint8_t *to_buf, size_t to_len, + uint8_t *from_buf, size_t from_len) +{ + if (!to_buf) + return; + /* + * SPC 4r37: 4.3.5.6 Allocation length: + * + * The device server shall terminate transfers to the Data-In Buffer + * when the number of bytes or blocks specified by the ALLOCATION + * LENGTH field have been transferred or when all available data + * have been transferred, whichever is less. + */ + memcpy(to_buf, from_buf, to_len > from_len ? from_len : to_len); +} + +static int handle_rwrecovery_page(struct tcmu_device *dev, uint8_t *ret_buf, + size_t ret_buf_len) +{ + uint8_t buf[12]; + + memset(buf, 0, sizeof(buf)); + buf[0] = 0x1; + buf[1] = 0xa; + + copy_to_response_buf(ret_buf, ret_buf_len, buf, 12); + return 12; +} + +static int handle_cache_page(struct tcmu_device *dev, uint8_t *ret_buf, + size_t ret_buf_len) +{ + uint8_t buf[20]; + + memset(buf, 0, sizeof(buf)); + buf[0] = 0x8; + buf[1] = 0x12; + + /* + * If device supports a writeback cache then set writeback + * cache enable (WCE) + */ + if (tcmu_dev_get_write_cache_enabled(dev)) + buf[2] = 0x4; + + copy_to_response_buf(ret_buf, ret_buf_len, buf, 20); + return 20; +} + +static int handle_control_page(struct tcmu_device *dev, uint8_t *ret_buf, + size_t ret_buf_len) +{ + uint8_t buf[12]; + + memset(buf, 0, sizeof(buf)); + buf[0] = 0x0a; + buf[1] = 0x0a; + + /* From spc4r31, section 7.5.7 Control mode Page + * + * GLTSD = 1: because we don't implicitly save log parameters + * + * A global logging target save disable (GLTSD) bit set to + * zero specifies that the logical unit implicitly saves, at + * vendor specific intervals, each log parameter in which the + * TSD bit (see 7.3) is set to zero. A GLTSD bit set to one + * specifies that the logical unit shall not implicitly save + * any log parameters. + */ + buf[2] = 0x02; + + /* From spc4r31, section 7.5.7 Control mode Page + * + * TAS = 1: Currently not settable by tcmu. Using the LIO default + * + * A task aborted status (TAS) bit set to zero specifies that + * aborted commands shall be terminated by the device server + * without any response to the application client. A TAS bit + * set to one specifies that commands aborted by the actions + * of an I_T nexus other than the I_T nexus on which the command + * was received shall be completed with TASK ABORTED status + */ + buf[5] = 0x40; + + /* From spc4r31, section 7.5.7 Control mode Page + * + * BUSY TIMEOUT PERIOD: Currently is unlimited + * + * The BUSY TIMEOUT PERIOD field specifies the maximum time, in + * 100 milliseconds increments, that the application client allows + * for the device server to return BUSY status for unanticipated + * conditions that are not a routine part of commands from the + * application client. This value may be rounded down as defined + * in 5.4(the Parameter rounding section). + * + * A 0000h value in this field is undefined by this standard. + * An FFFFh value in this field is defined as an unlimited period. + */ + buf[8] = 0xff; + buf[9] = 0xff; + + copy_to_response_buf(ret_buf, ret_buf_len, buf, 12); + return 12; +} + + +static struct mode_sense_handler { + uint8_t page; + uint8_t subpage; + int (*get)(struct tcmu_device *dev, uint8_t *buf, size_t buf_len); +} modesense_handlers[] = { + {0x1, 0, handle_rwrecovery_page}, + {0x8, 0, handle_cache_page}, + {0xa, 0, handle_control_page}, +}; + +static ssize_t handle_mode_sense(struct tcmu_device *dev, + struct mode_sense_handler *handler, + uint8_t **buf, size_t alloc_len, + size_t *used_len, bool sense_ten) +{ + int ret; + + ret = handler->get(dev, *buf, alloc_len - *used_len); + + if (!sense_ten && (*used_len + ret >= 255)) + return -EINVAL; + + /* + * SPC 4r37: 4.3.5.6 Allocation length: + * + * If the information being transferred to the Data-In Buffer includes + * fields containing counts of the number of bytes in some or all of + * the data (e.g., the PARAMETER DATA LENGTH field, the PAGE LENGTH + * field, the DESCRIPTOR LENGTH field, the AVAILABLE DATA field), + * then the contents of these fields shall not be altered to reflect + * the truncation, if any, that results from an insufficient + * ALLOCATION LENGTH value + */ + /* + * Setup the buffer so to still loop over the handlers, but just + * increment the used_len so we can return the + * final value. + */ + if (*buf && (*used_len + ret >= alloc_len)) + *buf = NULL; + + *used_len += ret; + if (*buf) + *buf += ret; + return ret; +} + +/* + * Handle MODE_SENSE(6) and MODE_SENSE(10). + * + * For TYPE_DISK only. + */ +int tcmu_emulate_mode_sense( + struct tcmu_device *dev, + uint8_t *cdb, + struct iovec *iovec, + size_t iov_cnt) +{ + bool sense_ten = (cdb[0] == MODE_SENSE_10); + uint8_t page_code = cdb[2] & 0x3f; + uint8_t subpage_code = cdb[3]; + size_t alloc_len = tcmu_dev_get_max_xfer_len(dev); + int i; + int ret; + size_t used_len; + uint8_t *buf; + uint8_t *orig_buf = NULL; + + if (!alloc_len) + return TCMU_STS_OK; + + /* Mode parameter header. Mode data length filled in at the end. */ + used_len = sense_ten ? 8 : 4; + if (used_len > alloc_len) + goto fail; + + buf = calloc(1, alloc_len); + if (!buf) + return TCMU_STS_NO_RESOURCE; + + orig_buf = buf; + buf += used_len; + + /* Don't fill in device-specific parameter */ + /* This helper fn doesn't support sw write protect (SWP) */ + + /* Don't report block descriptors */ + + if (page_code == 0x3f) { + for (i = 0; i < ARRAY_SIZE(modesense_handlers); i++) { + ret = handle_mode_sense(dev, &modesense_handlers[i], + &buf, alloc_len, &used_len, + sense_ten); + if (ret < 0) + goto free_buf; + } + } else { + ret = 0; + + for (i = 0; i < ARRAY_SIZE(modesense_handlers); i++) { + if (page_code == modesense_handlers[i].page && + subpage_code == modesense_handlers[i].subpage) { + ret = handle_mode_sense(dev, + &modesense_handlers[i], + &buf, alloc_len, + &used_len, sense_ten); + break; + } + } + + if (ret <= 0) + goto free_buf; + } + + if (sense_ten) { + uint16_t *ptr = (uint16_t*) orig_buf; + *ptr = htobe16(used_len - 2); + } + else { + orig_buf[0] = used_len - 1; + } + + tcmu_memcpy_into_iovec(iovec, iov_cnt, orig_buf, alloc_len); + free(orig_buf); + return TCMU_STS_OK; + +free_buf: + free(orig_buf); +fail: + return TCMU_STS_INVALID_CDB; +} + +/* + * Handle MODE_SELECT(6) and MODE_SELECT(10). + * + * For TYPE_DISK only. + */ +int tcmu_emulate_mode_select( + struct tcmu_device *dev, + uint8_t *cdb, + struct iovec *iovec, + size_t iov_cnt) +{ + bool select_ten = (cdb[0] == MODE_SELECT_10); + uint8_t page_code = cdb[2] & 0x3f; + uint8_t subpage_code = cdb[3]; + size_t alloc_len = tcmu_dev_get_max_xfer_len(dev); + int i; + int ret = 0; + size_t hdr_len = select_ten ? 8 : 4; + uint8_t buf[512]; + uint8_t in_buf[512]; + bool got_sense = false; + + if (!alloc_len) + return TCMU_STS_OK; + + if (tcmu_memcpy_from_iovec(in_buf, sizeof(in_buf), iovec, iov_cnt) >= sizeof(in_buf)) + return TCMU_STS_INVALID_PARAM_LIST_LEN; + + /* Abort if !pf or sp */ + if (!(cdb[1] & 0x10) || (cdb[1] & 0x01)) + return TCMU_STS_INVALID_CDB; + + memset(buf, 0, sizeof(buf)); + for (i = 0; i < ARRAY_SIZE(modesense_handlers); i++) { + if (page_code == modesense_handlers[i].page + && subpage_code == modesense_handlers[i].subpage) { + ret = modesense_handlers[i].get(dev, &buf[hdr_len], + sizeof(buf) - hdr_len); + if (ret <= 0) + return TCMU_STS_INVALID_CDB; + + if (!select_ten && (hdr_len + ret >= 255)) + return TCMU_STS_INVALID_CDB; + + got_sense = true; + break; + } + } + + if (!got_sense) + return TCMU_STS_INVALID_CDB; + + if (alloc_len < (hdr_len + ret)) + return TCMU_STS_INVALID_PARAM_LIST_LEN; + + /* Verify what was selected is identical to what sense returns, since we + don't support actually setting anything. */ + if (memcmp(&buf[hdr_len], &in_buf[hdr_len], ret)) + return TCMU_STS_INVALID_PARAM_LIST; + + return TCMU_STS_OK; +} + +int tcmu_emulate_start_stop(struct tcmu_device *dev, uint8_t *cdb) +{ + if ((cdb[4] >> 4) & 0xf) + return TCMU_STS_INVALID_CDB; + + /* Currently, we don't allow ejecting the medium, so we're + * ignoring the FBO_PREV_EJECT flag, but it may turn out that + * initiators do not handle this well, so we may have to change + * this behavior. + */ + + if (!(cdb[4] & 0x01)) + return TCMU_STS_INVALID_CDB; + + return TCMU_STS_OK; +} diff --git a/tcmu/helper.h b/tcmu/helper.h new file mode 100644 index 0000000..bbbc2be --- /dev/null +++ b/tcmu/helper.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2014 Red Hat, Inc. + * + * This file is licensed to you under your choice of the GNU Lesser + * General Public License, version 2.1 or any later version (LGPLv2.1 or + * later), or the Apache License 2.0. + */ + +/* + * APIs for both libtcmu users and tcmu-runner plugins to use. + */ + +#ifndef __TCMU_HELPER_H +#define __TCMU_HELPER_H + +#include + +/* Basic implementations of mandatory SCSI commands */ +int tcmu_emulate_inquiry(struct tcmu_device *dev, uint8_t *cdb, struct iovec *iovec, size_t iov_cnt); +int tcmu_emulate_start_stop(struct tcmu_device *dev, uint8_t *cdb); +int tcmu_emulate_test_unit_ready(uint8_t *cdb, struct iovec *iovec, size_t iov_cnt); +int tcmu_emulate_read_capacity_10(uint64_t num_lbas, uint32_t block_size, uint8_t *cdb, + struct iovec *iovec, size_t iov_cnt); +int tcmu_emulate_read_capacity_16(uint64_t num_lbas, uint32_t block_size, uint8_t *cdb, + struct iovec *iovec, size_t iov_cnt); +int tcmu_emulate_mode_sense(struct tcmu_device *dev, uint8_t *cdb, + struct iovec *iovec, size_t iov_cnt); +int tcmu_emulate_mode_select(struct tcmu_device *dev, uint8_t *cdb, + struct iovec *iovec, size_t iov_cnt); + +#endif diff --git a/tcmu/tcmu.c b/tcmu/tcmu.c new file mode 100644 index 0000000..70b9a91 --- /dev/null +++ b/tcmu/tcmu.c @@ -0,0 +1,598 @@ +/* + * A TCMU userspace handler for QEMU block drivers. + * + * Copyright (C) 2016 Red Hat, Inc. + * + * Authors: + * Fam Zheng + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "libtcmu.h" +#include "helper.h" +#include "qapi/qmp/qerror.h" +#include "qemu/error-report.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "block/aio.h" +#include "block/qdict.h" +#include "scsi/constants.h" +#include "tcmu/tcmu.h" +#include "qemu/main-loop.h" +#include "qemu/option.h" +#include "qapi/qapi-commands.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qmp/qdict.h" +#include "qapi/error.h" + +#include "qemu/compiler.h" +#include "trace.h" + +typedef struct TCMUExport TCMUExport; + +struct TCMUExport { + BlockBackend *blk; + struct tcmu_device *tcmu_dev; + bool writable; + QLIST_ENTRY(TCMUExport) next; +}; + +typedef struct { + struct tcmulib_context *tcmulib_ctx; +} TCMUHandlerState; + +static QLIST_HEAD(, TCMUExport) tcmu_exports = + QLIST_HEAD_INITIALIZER(tcmu_exports); + +static TCMUHandlerState *handler_state; + +/* This's temporary, will use scsi/utils.c code */ +#define ASCQ_INVALID_FIELD_IN_CDB 0x2400 + +typedef struct { + struct tcmulib_cmd *cmd; + TCMUExport *exp; + QEMUIOVector *qiov; +} TCMURequest; + +static void qemu_tcmu_aio_cb(void *opaque, int ret) +{ + TCMURequest *req = opaque; + + trace_qemu_tcmu_aio_cb(); + tcmulib_command_complete(req->exp->tcmu_dev, req->cmd, + ret ? CHECK_CONDITION : GOOD); + tcmulib_processing_complete(req->exp->tcmu_dev); + g_free(req->qiov); + g_free(req); +} + +static inline TCMURequest *qemu_tcmu_req_new(TCMUExport *exp, + struct tcmulib_cmd *cmd, + QEMUIOVector *qiov) +{ + TCMURequest *req = g_new(TCMURequest, 1); + *req = (TCMURequest) { + .exp = exp, + .cmd = cmd, + .qiov = qiov, + }; + return req; +} + +static int qemu_tcmu_handle_cmd(TCMUExport *exp, struct tcmulib_cmd *cmd) +{ + + uint8_t *cdb = cmd->cdb; + /* TODO: block size? */ + uint64_t offset = tcmu_cdb_get_lba(cdb) << BDRV_SECTOR_BITS; + QEMUIOVector *qiov; + + trace_qemu_tcmu_handle_cmd(cdb[0]); + switch (cdb[0]) { + case INQUIRY: + return tcmu_emulate_inquiry(exp->tcmu_dev, cdb, + cmd->iovec, cmd->iov_cnt); + case TEST_UNIT_READY: + return tcmu_emulate_test_unit_ready(cdb, cmd->iovec, cmd->iov_cnt); + case SERVICE_ACTION_IN_16: + if (cdb[1] == SAI_READ_CAPACITY_16) { + return tcmu_emulate_read_capacity_16(blk_getlength(exp->blk) / 512, + 512, + cmd->cdb, cmd->iovec, + cmd->iov_cnt); + } else { + return TCMU_STS_NOT_HANDLED; + } + case MODE_SENSE: + case MODE_SENSE_10: + return tcmu_emulate_mode_sense(exp->tcmu_dev, cdb, cmd->iovec, + cmd->iov_cnt); + case MODE_SELECT: + case MODE_SELECT_10: + return tcmu_emulate_mode_select(exp->tcmu_dev, cdb, cmd->iovec, + cmd->iov_cnt); + case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: + if (cdb[1] & 0x2) { + return tcmu_sense_set_data(cmd->sense_buf, ILLEGAL_REQUEST, + ASCQ_INVALID_FIELD_IN_CDB); + } else { + blk_aio_flush(exp->blk, qemu_tcmu_aio_cb, + qemu_tcmu_req_new(exp, cmd, NULL)); + return TCMU_STS_ASYNC_HANDLED; + } + break; + case READ_6: + case READ_10: + case READ_12: + case READ_16: + qiov = g_new(QEMUIOVector, 1); + qemu_iovec_init_external(qiov, cmd->iovec, cmd->iov_cnt); + trace_qemu_tcmu_handle_cmd_read(offset); + blk_aio_preadv(exp->blk, offset, qiov, 0, qemu_tcmu_aio_cb, + qemu_tcmu_req_new(exp, cmd, qiov)); + return TCMU_STS_ASYNC_HANDLED; + + case WRITE_6: + case WRITE_10: + case WRITE_12: + case WRITE_16: + qiov = g_new(QEMUIOVector, 1); + qemu_iovec_init_external(qiov, cmd->iovec, cmd->iov_cnt); + trace_qemu_tcmu_handle_cmd_write(offset); + blk_aio_pwritev(exp->blk, offset, qiov, 0, qemu_tcmu_aio_cb, + qemu_tcmu_req_new(exp, cmd, qiov)); + return TCMU_STS_ASYNC_HANDLED; + + default: + trace_qemu_tcmu_handle_cmd_unknown_cmd(cdb[0]); + return TCMU_STS_NOT_HANDLED; + } +} + +static void qemu_tcmu_dev_event_handler(void *opaque) +{ + TCMUExport *exp = opaque; + struct tcmulib_cmd *cmd; + struct tcmu_device *dev = exp->tcmu_dev; + + tcmulib_processing_start(dev); + + while ((cmd = tcmulib_get_next_command(dev)) != NULL) { + int ret = qemu_tcmu_handle_cmd(exp, cmd); + if (ret != TCMU_STS_ASYNC_HANDLED) { + tcmulib_command_complete(dev, cmd, ret); + } + } + + tcmulib_processing_complete(dev); +} + +static TCMUExport *tcmu_export_lookup(const BlockBackend *blk) +{ + TCMUExport *exp; + + QLIST_FOREACH(exp, &tcmu_exports, next) { + if (exp->blk == blk) { + return exp; + } + } + return NULL; +} +static TCMUExport *parse_cfgstr(const char *cfgstr, + Error **errp); +static bool check_cfgstr(const char *cfgstr, + Error **errp); + +QemuOptsList qemu_tcmu_common_export_opts = { + .name = "export", + .head = QTAILQ_HEAD_INITIALIZER(qemu_tcmu_common_export_opts.head), + .desc = { + { + .name = "snapshot", + .type = QEMU_OPT_BOOL, + .help = "enable/disable snapshot mode", + },{ + .name = "aio", + .type = QEMU_OPT_STRING, + .help = "host AIO implementation (threads, native)", + },{ + .name = "format", + .type = QEMU_OPT_STRING, + .help = "disk format (raw, qcow2, ...)", + },{ + .name = "file", + .type = QEMU_OPT_STRING, + .help = "file name", + }, + { /* end of list */ } + }, +}; + +QemuOptsList qemu_tcmu_export_opts = { + .name = "export", + .head = QTAILQ_HEAD_INITIALIZER(qemu_tcmu_export_opts.head), + .desc = { + /* no elements => accept any params */ + { /* end of list */ } + }, +}; + +int export_init_func(void *opaque, QemuOpts *all_opts, Error **errp) +{ + int flags = BDRV_O_RDWR; + const char *buf; + int ret = 0; + bool writethrough; + BlockBackend *blk; + int snapshot = 0; + Error *local_err = NULL; + QemuOpts *common_opts; + const char *id; + const char *aio; + const char *value; + QDict *bs_opts; + bool read_only = false; + const char *file; + TCMUExport *exp; + + value = qemu_opt_get(all_opts, "cache"); + if (value) { + if (bdrv_parse_cache_mode(value, &flags, &writethrough) != 0) { + error_report("invalid cache option"); + ret = -1; + goto err_too_early; + } + /* Specific options take precedence */ + if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_DIRECT)) { + qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_DIRECT, + !!(flags & BDRV_O_NOCACHE), &error_abort); + } + if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_NO_FLUSH)) { + qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_NO_FLUSH, + !!(flags & BDRV_O_NO_FLUSH), &error_abort); + } + qemu_opt_unset(all_opts, "cache"); + } + + bs_opts = qdict_new(); + /* all_opts->id also copied into one option in bs_opts */ + qemu_opts_to_qdict(all_opts, bs_opts); + + id = qdict_get_try_str(bs_opts, "id"); + common_opts = qemu_opts_create(&qemu_tcmu_common_export_opts, id, 1, + &local_err); + if (local_err) { + error_report_err(local_err); + ret = -1; + goto err_no_opts; + } + + trace_export_init_func(); + + qemu_opts_absorb_qdict(common_opts, bs_opts, &local_err); + if (local_err) { + error_report_err(local_err); + ret = -1; + goto early_err; + } + + if (id) { + qdict_del(bs_opts, "id"); + } + + if ((aio = qemu_opt_get(common_opts, "aio")) != NULL) { + if (!strcmp(aio, "native")) { + flags |= BDRV_O_NATIVE_AIO; + } else if (!strcmp(aio, "threads")) { + /* this is the default */ + } else { + error_report("invalid aio option"); + ret = -1; + goto early_err; + } + } + + if ((buf = qemu_opt_get(common_opts, "format")) != NULL) { + if (qdict_haskey(bs_opts, "driver")) { + error_report("Cannot specify both 'driver' and 'format'"); + ret = -1; + goto early_err; + } + qdict_put_str(bs_opts, "driver", buf); + } + + snapshot = qemu_opt_get_bool(common_opts, "snapshot", 0); + if (snapshot) { + flags |= BDRV_O_SNAPSHOT; + } + + read_only = qemu_opt_get_bool(common_opts, BDRV_OPT_READ_ONLY, false); + if (read_only) + flags &= ~BDRV_O_RDWR; + + /* bdrv_open() defaults to the values in bdrv_flags (for compatibility + * with other callers) rather than what we want as the real defaults + * Apply the defaults here instead. */ + qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off"); + qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off"); + qdict_set_default_str(bs_opts, BDRV_OPT_READ_ONLY, + read_only ? "on" : "off"); + + /* if (qemu_opts_id(all_opts) == NULL) */ + + file = qemu_opt_get(common_opts, "file"); + blk = blk_new_open(file, NULL, bs_opts, flags, &local_err); + bs_opts = NULL; + if (!blk) { + error_report_err(local_err); + ret = -1; + goto err_no_bs_opts; + } + + blk_set_enable_write_cache(blk, !writethrough); + + id = qemu_opts_id(common_opts); + if (!monitor_add_blk(blk, id, &local_err)) { + error_report_err(local_err); + blk_unref(blk); + ret = -1; + goto err_no_bs_opts; + } + + exp = tcmu_export_new(blk, flags & BDRV_O_RDWR, &local_err); + if (!exp) { + error_reportf_err(local_err, "Failed to create export: "); + ret = -1; + monitor_remove_blk(blk); + } + +err_no_bs_opts: +early_err: + qemu_opts_del(common_opts); +err_no_opts: + qobject_unref(bs_opts); +err_too_early: + return ret; +} + +static bool qemu_tcmu_check_config(const char *cfgstr, char **reason) +{ + Error *local_err = NULL; + + if (!check_cfgstr(cfgstr, &local_err) && local_err) { + *reason = strdup(error_get_pretty(local_err)); + error_free(local_err); + return false; + } + return true; +} + +static int qemu_tcmu_added(struct tcmu_device *dev) +{ + TCMUExport *exp; + const char *cfgstr = tcmu_dev_get_cfgstring(dev); + Error *local_err = NULL; + + exp = parse_cfgstr(cfgstr, &local_err); + if (!exp) { + return -1; + } + exp->tcmu_dev = dev; + tcmu_dev_set_private(dev, exp); + aio_set_fd_handler(blk_get_aio_context(exp->blk), + tcmu_dev_get_fd(dev), + true, qemu_tcmu_dev_event_handler, + NULL, NULL, exp); + return 0; +} + +static void tcmu_export_close(TCMUExport *exp) +{ + aio_set_fd_handler(blk_get_aio_context(exp->blk), + tcmu_dev_get_fd(exp->tcmu_dev), + false, NULL, + NULL, NULL, NULL); + monitor_remove_blk(exp->blk); + blk_unref(exp->blk); + QLIST_REMOVE(exp, next); + g_free(exp); +} + +static void qemu_tcmu_removed(struct tcmu_device *dev) +{ + TCMUExport *exp = tcmu_dev_get_private(dev); + + if(exp) + tcmu_export_close(exp); +} + +static void qemu_tcmu_master_read(void *opaque) +{ + TCMUHandlerState *s = opaque; + + trace_qemu_tcmu_master_read(); + tcmulib_master_fd_ready(s->tcmulib_ctx); +} + +static struct tcmulib_handler qemu_tcmu_handler = { + .name = "Handler for QEMU block devices", + .subtype = NULL, /* Dynamically generated when starting. */ + .cfg_desc = "Format: device=", + .added = qemu_tcmu_added, + .removed = qemu_tcmu_removed, + .check_config = qemu_tcmu_check_config, +}; + +static bool check_cfgstr(const char *cfgstr, + Error **errp) +{ + BlockBackend *blk; + const char *dev_str, *id, *device; + const char *pr; + const char *subtype = qemu_tcmu_handler.subtype; + size_t subtype_len; + TCMUExport *exp; + + if (!subtype) { + error_setg(errp, "TCMU Handler not started"); + } + subtype_len = strlen(subtype); + if (strncmp(cfgstr, subtype, subtype_len) || + cfgstr[subtype_len] != '/') { + error_report("TCMU: Invalid subtype in device cfgstring: %s", cfgstr); + return false; + } + dev_str = &cfgstr[subtype_len + 1]; + if (dev_str[0] != '@') { + error_report("TCMU: Invalid cfgstring format. Must be @"); + return false; + } + device = &dev_str[1]; + + pr = strchr(device, '@'); + if (!pr) { + id = device; + blk = blk_by_name(id); + if (!blk) { + error_setg(errp, "TCMU: Device not found: %s", id); + return false; + } + exp = tcmu_export_lookup(blk); + if (!exp) { + error_setg(errp, "TCMU: Device not found: %s", id); + return false; + } + }// TODO: else to check id? + + return true; +} + +static void tcmu_convert_delim(char *to, const char *opts) +{ + while (*opts != '\0') { + if (*opts == '@') { + *to = ','; + } else + *to = *opts; + + opts++; + to++; + } + + if(to) + *to = '\0'; +} +static TCMUExport *parse_cfgstr(const char *cfgstr, + Error **errp) +{ + const char *device, *id, *pr; + const char *subtype = qemu_tcmu_handler.subtype; + size_t subtype_len; + TCMUExport *exp = NULL; + char *new_device; + + subtype_len = strlen(subtype); + device = &cfgstr[subtype_len + 2]; + + pr = strchr(device, '@'); + if (!pr) { + id = device; + exp = tcmu_export_lookup(blk_by_name(id)); + } + else { + QemuOpts * export_opts; + + new_device = g_malloc0(strlen(device) + 1); + tcmu_convert_delim(new_device, device); + + /* parse new_device into an QemuOpts and link into + qemu_tcmu_export_opts with QemuOpts->id set while + without an option id in QemuOpts. + */ + export_opts = qemu_opts_parse_noisily(&qemu_tcmu_export_opts, + new_device, false); + trace_qemu_tcmu_parse_cfgstr(); + g_free(new_device); + + if(!export_opts) + goto fail; + + if (export_init_func(NULL, export_opts, NULL)) + goto fail; + + id = qemu_opts_id(export_opts); + exp = tcmu_export_lookup(blk_by_name(id)); + + qemu_opts_del(export_opts); + } + +fail: + return exp; +} + +void qemu_tcmu_stop(void) +{ + tcmulib_close(handler_state->tcmulib_ctx); + g_free(handler_state); + handler_state = NULL; +} + +void qemu_tcmu_start(const char *subtype, Error **errp) +{ + int fd; + + trace_qemu_tcmu_start(); + if (handler_state) { + error_setg(errp, "TCMU handler already started"); + return; + } + assert(!qemu_tcmu_handler.subtype); + qemu_tcmu_handler.subtype = g_strdup(subtype); + handler_state = g_new0(TCMUHandlerState, 1); + handler_state->tcmulib_ctx = tcmulib_initialize(&qemu_tcmu_handler, 1); + + if (!handler_state->tcmulib_ctx) { + error_setg(errp, "Failed to initialize tcmulib"); + goto fail; + } + fd = tcmulib_get_master_fd(handler_state->tcmulib_ctx); + qemu_set_fd_handler(fd, qemu_tcmu_master_read, NULL, handler_state); + trace_qemu_tcmu_start_register(); + tcmulib_register(handler_state->tcmulib_ctx); + return; + +fail: + g_free(handler_state); + handler_state = NULL; +} + +TCMUExport *tcmu_export_new(BlockBackend *blk, bool writable, Error **errp) +{ + TCMUExport *exp; + + exp = tcmu_export_lookup(blk); + if (exp) { + error_setg(errp, "Block device already added"); + return NULL; + } + exp = g_new0(TCMUExport, 1); + exp->blk = blk; + blk_ref(blk); + exp->writable = writable; + QLIST_INSERT_HEAD(&tcmu_exports, exp, next); + return exp; +} diff --git a/tcmu/trace-events b/tcmu/trace-events new file mode 100644 index 0000000..62ad30e --- /dev/null +++ b/tcmu/trace-events @@ -0,0 +1,12 @@ +# tcmu/tcmu.c + +qemu_tcmu_aio_cb(void) "aio cb" +qemu_tcmu_handle_cmd(uint8_t cdb) "handle cmd: 0x%x" +qemu_tcmu_handle_cmd_read(uint64_t offset) "read at %ld" +qemu_tcmu_handle_cmd_write(uint64_t offset) "write at %ld" +qemu_tcmu_handle_cmd_unknown_cmd(uint8_t cdb) "unknown cmd: 0x%x" +qemu_tcmu_master_read(void) "master read" +qemu_tcmu_start(void) "start" +qemu_tcmu_start_register(void) "register" +qemu_tcmu_parse_cfgstr(void) "parse noisily" +export_init_func(void) "parse common" -- 1.8.3.1