From: Alexey Dobriyan <adobriyan@gmail.com>
To: axboe@kernel.dk
Cc: fio@vger.kernel.org
Subject: [PATCH] fio: add NVMe engine
Date: Thu, 26 Mar 2020 23:44:06 +0300 [thread overview]
Message-ID: <20200326204406.GA29597@avx2> (raw)
Add simple iodepth=1 NVMe engine:
ioengine=nvme
It works via standard Linux NVMe ioctls.
It will be used for testing upcoming ZNS stuff.
Currently Linux doesn't recognize NVMe ZNS devices as zoned block
devices so zone ioctls (BLKRESETZONE et al) can't be used.
Passthrough ioctls should allow Zone Append and whatever commands
new specs bring.
Support read, write, fsync, fdatasync.
Don't support sync_file_range obviously.
Don't support trim for now, until I figure all qemu options and
the story behind broken qemu trim support.
Signed-off-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com>
---
Makefile | 3
configure | 20 +++++
engines/nvme.c | 226 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
file.h | 1
4 files changed, 250 insertions(+)
--- a/Makefile
+++ b/Makefile
@@ -163,6 +163,9 @@ endif
ifdef CONFIG_LINUX_BLKZONED
SOURCE += zbd.c
endif
+ifdef CONFIG_NVME
+ SOURCE += engines/nvme.c
+endif
ifeq ($(CONFIG_TARGET_OS), Linux)
SOURCE += diskutil.c fifo.c blktrace.c cgroup.c trim.c engines/sg.c \
--- a/configure
+++ b/configure
@@ -2397,6 +2397,22 @@ if compile_prog "" "" "linux_blkzoned"; then
fi
print_config "Zoned block device support" "$linux_blkzoned"
+##########################################
+if test "$linux_nvme" != "yes" ; then
+ linux_nvme="no"
+fi
+cat >$TMPC <<EOF
+#include <linux/nvme_ioctl.h>
+int main(void)
+{
+ return 0;
+}
+EOF
+if compile_prog "" "" "linux_nvme"; then
+ linux_nvme="yes"
+fi
+print_config "NVMe engine" "$linux_nvme"
+
##########################################
# check march=armv8-a+crc+crypto
if test "$march_armv8_a_crc_crypto" != "yes" ; then
@@ -2912,6 +2928,10 @@ if test "$libnbd" = "yes" ; then
echo "LIBNBD_CFLAGS=$libnbd_cflags" >> $config_host_mak
echo "LIBNBD_LIBS=$libnbd_libs" >> $config_host_mak
fi
+if test "$linux_nvme" = "yes" ; then
+ output_sym "CONFIG_NVME"
+fi
+
cat > $TMPC << EOF
int main(int argc, char **argv)
{
new file mode 100644
--- /dev/null
+++ b/engines/nvme.c
@@ -0,0 +1,226 @@
+/* NVMe passthrough engine. */
+#include <linux/nvme_ioctl.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+
+#include "../fio.h"
+
+enum {
+ nvme_admin_identify = 6,
+};
+
+enum {
+ nvme_cmd_flush = 0,
+ nvme_cmd_write = 1,
+ nvme_cmd_read = 2,
+};
+
+struct nvme_lbaf {
+ __le16 ms;
+ __u8 ds;
+ __u8 rp;
+};
+
+struct nvme_id_ns {
+ __le64 nsze;
+ __le64 ncap;
+ __le64 nuse;
+ __u8 nsfeat;
+ __u8 nlbaf;
+ __u8 flbas;
+ __u8 mc;
+ __u8 dpc;
+ __u8 dps;
+ __u8 nmic;
+ __u8 rescap;
+ __u8 fpi;
+ __u8 dlfeat;
+ __le16 nawun;
+ __le16 nawupf;
+ __le16 nacwu;
+ __le16 nabsn;
+ __le16 nabo;
+ __le16 nabspf;
+ __le16 noiob;
+ __u8 nvmcap[16];
+ __le16 npwg;
+ __le16 npwa;
+ __le16 npdg;
+ __le16 npda;
+ __le16 nows;
+ __u8 rsvd74[18];
+ __le32 anagrpid;
+ __u8 rsvd96[3];
+ __u8 nsattr;
+ __le16 nvmsetid;
+ __le16 endgid;
+ __u8 nguid[16];
+ __u8 eui64[8];
+ struct nvme_lbaf lbaf[16];
+ __u8 rsvd192[192];
+ __u8 vs[3712];
+};
+
+static inline uint32_t get_nsid(const struct fio_file *f)
+{
+ return (uintptr_t)f->engine_data;
+}
+
+static int nvme_open_file(struct thread_data *td, struct fio_file *f)
+{
+ struct nvme_admin_cmd cmd;
+ struct nvme_id_ns id;
+ struct stat st;
+ uint32_t nsid;
+
+ /* NVMe ioctls ignore open flags, require CAP_SYS_ADMIN only. */
+ f->fd = open(f->file_name, O_RDONLY);
+ if (f->fd < 0) {
+ return -errno;
+ }
+ if (fstat(f->fd, &st) == -1) {
+ return -errno;
+ }
+ if (!S_ISBLK(st.st_mode)) {
+ log_err("%s: nvme engine requires NVMe block device\n",
+ f->file_name);
+ return 1;
+ }
+
+ nsid = ioctl(f->fd, NVME_IOCTL_ID);
+ if (nsid < 1) {
+ log_err("%s: ioctl NVME_IOCTL_ID\n", f->file_name);
+ return 1;
+ }
+
+ f->engine_data = (void *)(uintptr_t)nsid;
+
+ /* Identify Namespace */
+ memset(&cmd, 0, sizeof(struct nvme_admin_cmd));
+ cmd.opcode = nvme_admin_identify;
+ cmd.nsid = nsid;
+ cmd.addr = (uintptr_t)&id;
+ cmd.data_len = 4096;
+ if (ioctl(f->fd, NVME_IOCTL_ADMIN_CMD, &cmd) != 0) {
+ log_err("%s: ioctl NVME_IOCTL_ADMIN_CMD\n", f->file_name);
+ return 1;
+ }
+
+ f->lba_shift = id.lbaf[id.flbas & 15].ds;
+ return 0;
+}
+
+static int fio_nvme_read(struct fio_file *f, struct io_u *io_u)
+{
+ fio_unused uint32_t nsid = get_nsid(f);
+ struct nvme_user_io cmd = {};
+
+ //printf("R %u %llu/%llu\n", nsid, io_u->offset, io_u->xfer_buflen);
+
+ assert((io_u->xfer_buflen & ((1ULL << f->lba_shift) - 1)) == 0);
+ assert((io_u->offset & ((1ULL << f->lba_shift) - 1)) == 0);
+
+ cmd.opcode = nvme_cmd_read;
+ cmd.nblocks = (io_u->xfer_buflen >> f->lba_shift) - 1;
+ cmd.addr = (uintptr_t)io_u->xfer_buf;
+ cmd.slba = io_u->offset >> f->lba_shift;
+ return ioctl(f->fd, NVME_IOCTL_SUBMIT_IO, &cmd);
+}
+
+static int fio_nvme_write(struct fio_file *f, struct io_u *io_u)
+{
+ fio_unused uint32_t nsid = get_nsid(f);
+ struct nvme_user_io cmd = {};
+
+ //printf("W %u %llu/%llu\n", nsid, io_u->offset, io_u->xfer_buflen);
+
+ assert((io_u->xfer_buflen & ((1ULL << f->lba_shift) - 1)) == 0);
+ assert((io_u->offset & ((1ULL << f->lba_shift) - 1)) == 0);
+
+ cmd.opcode = nvme_cmd_write;
+ cmd.nblocks = (io_u->xfer_buflen >> f->lba_shift) - 1;
+ cmd.addr = (uintptr_t)io_u->xfer_buf;
+ cmd.slba = io_u->offset >> f->lba_shift;
+ return ioctl(f->fd, NVME_IOCTL_SUBMIT_IO, &cmd);
+}
+
+static int fio_nvme_flush(struct fio_file *f)
+{
+ uint32_t nsid = get_nsid(f);
+ struct nvme_passthru_cmd cmd = {};
+
+ //printf("F %u\n", nsid);
+
+ cmd.opcode = nvme_cmd_flush;
+ cmd.nsid = nsid;
+ return ioctl(f->fd, NVME_IOCTL_IO_CMD, &cmd);
+}
+
+static enum fio_q_status fio_nvme_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct fio_file *f = io_u->file;
+ int rv;
+
+ fio_ro_check(td, io_u);
+
+ if (io_u->ddir == DDIR_READ) {
+ // FIXME MDTS
+ rv = fio_nvme_read(f, io_u);
+ if (rv == 0) {
+ io_u->resid = 0;
+ io_u->error = 0;
+ } else {
+ io_u->error = rv;
+ }
+ } else if (io_u->ddir == DDIR_WRITE) {
+ // FIXME MDTS
+ rv = fio_nvme_write(f, io_u);
+ if (rv == 0) {
+ io_u->resid = 0;
+ io_u->error = 0;
+ } else {
+ io_u->error = rv;
+ }
+ } else if (io_u->ddir == DDIR_TRIM) {
+ // FIXME
+ rv = io_u->xfer_buflen;
+ io_u->error = EINVAL;
+ } else if (io_u->ddir == DDIR_SYNC || io_u->ddir == DDIR_DATASYNC) {
+ rv = fio_nvme_flush(f);
+ } else {
+ rv = io_u->xfer_buflen;
+ io_u->error = EINVAL;
+ }
+
+ if (io_u->error) {
+ io_u_log_error(td, io_u);
+ td_verror(td, io_u->error, "xfer");
+ }
+
+ return FIO_Q_COMPLETED;
+}
+
+static struct ioengine_ops ioengine = {
+ .name = "nvme",
+ .version = FIO_IOOPS_VERSION,
+ .flags = FIO_SYNCIO|FIO_RAWIO|FIO_NOEXTEND,
+ .queue = fio_nvme_queue,
+ .open_file = nvme_open_file,
+ .close_file = generic_close_file,
+ .get_file_size = generic_get_file_size,
+};
+
+fio_init
+static void register_nvme_ioengine(void)
+{
+ register_ioengine(&ioengine);
+}
+
+fio_exit
+static void unregister_nvme_ioengine(void)
+{
+ unregister_ioengine(&ioengine);
+}
--- a/file.h
+++ b/file.h
@@ -99,6 +99,7 @@ struct fio_file {
uint64_t real_file_size;
uint64_t file_offset;
uint64_t io_size;
+ unsigned int lba_shift;
/*
* Zoned block device information. See also zonemode=zbd.
next reply other threads:[~2020-03-26 20:44 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-26 20:44 Alexey Dobriyan [this message]
2020-03-26 22:05 ` [PATCH] fio: add NVMe engine Jens Axboe
2020-03-27 6:19 ` Alexey Dobriyan
2020-03-27 14:45 ` Jens Axboe
2020-03-27 0:56 ` Damien Le Moal
2020-03-27 6:14 ` Alexey Dobriyan
2020-03-27 14:25 ` Keith Busch
2020-03-27 14:26 ` Keith Busch
2020-03-27 19:06 ` Alexey Dobriyan
2020-03-27 21:05 ` Keith Busch
2020-03-27 14:47 ` Jens Axboe
2020-03-27 19:01 ` Alexey Dobriyan
2020-03-27 21:25 ` Jens Axboe
2020-03-27 21:58 ` Keith Busch
2020-03-28 13:41 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200326204406.GA29597@avx2 \
--to=adobriyan@gmail.com \
--cc=axboe@kernel.dk \
--cc=fio@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.