Re: ZBC/FLEX FIO addition ideas

Flexible I/O Tester development
 help / color / mirror / Atom feed

From: Bart Van Assche <Bart.VanAssche@wdc.com>
To: "sitsofe@gmail.com" <sitsofe@gmail.com>, Kris Davis <Kris.Davis@wdc.com>
Cc: "fio@vger.kernel.org" <fio@vger.kernel.org>,
	Jason Jorgensen <jason.jorgensen@wdc.com>,
	"phillip.a.chen@seagate.com" <phillip.a.chen@seagate.com>
Subject: Re: ZBC/FLEX FIO addition ideas
Date: Thu, 15 Mar 2018 16:43:52 +0000	[thread overview]
Message-ID: <1521132231.2834.4.camel@wdc.com> (raw)
In-Reply-To: <CALjAwxgn-yxPf5noENrdbhPKS7T3BVsArSa8wG50e0r88DPr-A@mail.gmail.com>

On Thu, 2018-03-15 at 16:30 +0000, Sitsofe Wheeler wrote:
> On 15 March 2018 at 16:15, Kris Davis <Kris.Davis@wdc.com> wrote:
> > Despite the desire to have fio work with Host Managed devices independent of the engine, a Host Managed device operation is different than a traditional block device, and the kernel also has to
> > manage it differently. Thus, I would still recommend creating a new IO engine for use with Host Managed devices, we would not want the additional overhead associated with SMR to impact the
> > standard aio engine.  We have mostly used the fio SG engine along with external operations in testing of SMR.
> > 
> > Here is the way we envision a new fio engine might work with Host Managed devices:
> 
> It's definitely going to need something special. I think last time
> round (https://www.spinics.net/lists/fio/msg06646.html ) I suggested a
> profile but perhaps that won't be enough. I doubt an ioengine would be
> enough because you're going to have interact with the next offset code
> etc. unless you're going to fake I/Os done to "wrong" regions...

Hello Sitsofe,

Adding support for ZBC drives as a profile has a significant disadvantage,
namely that the different I/O patterns (sequential read, sequential write,
random read, random write, ...) all have to be reimplemented. That's why I'm
considering to add ZBC support by modifying what get_next_block() produces.
Can you have a look at the (barely tested) patch below?

Thanks,

Bart.


diff --git a/Makefile b/Makefile
index 8f4871c63528..44dd7f3439f6 100644
--- a/Makefile
+++ b/Makefile
@@ -145,6 +145,9 @@ endif
 ifdef CONFIG_LIBPMEM
   SOURCE += engines/libpmem.c
 endif
+ifdef HAVE_LINUX_BLKZONED_H
+  SOURCE += zbc.c
+endif
 
 ifeq ($(CONFIG_TARGET_OS), Linux)
   SOURCE += diskutil.c fifo.c blktrace.c cgroup.c trim.c engines/sg.c \
diff --git a/configure b/configure
index 4442a1cc0d75..688e86201320 100755
--- a/configure
+++ b/configure
@@ -2104,6 +2104,27 @@ if compile_prog "" "" "valgrind_dev"; then
 fi
 print_config "Valgrind headers" "$valgrind_dev"
 
+##########################################
+# <linux/blkzoned.h> probe
+if test "$linux_blkzoned_h" != "yes" ; then
+  linux_blkzoned_h="no"
+fi
+cat > $TMPC << EOF
+#include <linux/blkzoned.h>
+int main(int argc, char **argv)
+{
+  return 0;
+}
+EOF
+if compile_prog "" "" "linux_blkzoned_h"; then
+  linux_blkzoned_h="yes"
+fi
+print_config "<linux/blkzoned.h>" "$linux_blkzoned_h"
+if test "$linux_blkzoned_h" = "yes" ; then
+  output_sym "HAVE_LINUX_BLKZONED_H"
+fi
+
+##########################################
 # check march=armv8-a+crc+crypto
 if test "$march_armv8_a_crc_crypto" != "yes" ; then
   march_armv8_a_crc_crypto="no"
diff --git a/debug.h b/debug.h
index b8718ddc225f..c69c8079beda 100644
--- a/debug.h
+++ b/debug.h
@@ -24,6 +24,7 @@ enum {
 	FD_COMPRESS,
 	FD_STEADYSTATE,
 	FD_HELPERTHREAD,
+	FD_ZBC,
 	FD_DEBUG_MAX,
 };
 
diff --git a/file.h b/file.h
index 8fd34b136c23..42b304629824 100644
--- a/file.h
+++ b/file.h
@@ -10,6 +10,9 @@
 #include "lib/lfsr.h"
 #include "lib/gauss.h"
 
+/* Forward declarations */
+struct zoned_block_device_info;
+
 /*
  * The type of object we are working on
  */
@@ -97,6 +100,11 @@ struct fio_file {
 	uint64_t file_offset;
 	uint64_t io_size;
 
+	/*
+	 * Zoned device information
+	 */
+	struct zoned_block_device_info *zbd_info;
+
 	/*
 	 * Track last end and last start of IO for a given data direction
 	 */
diff --git a/filesetup.c b/filesetup.c
index 7cbce1327f8f..d981c61f5b7f 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -16,6 +16,7 @@
 #include "hash.h"
 #include "lib/axmap.h"
 #include "rwlock.h"
+#include "zbc.h"
 
 #ifdef CONFIG_LINUX_FALLOCATE
 #include <linux/falloc.h>
@@ -773,6 +774,9 @@ static int get_file_sizes(struct thread_data *td)
 		 */
 		if (f->real_file_size == -1ULL && td->o.size)
 			f->real_file_size = td->o.size / td->o.nr_files;
+
+		if (f->filetype == FIO_TYPE_BLOCK)
+			zbc_init_zone_info(f);
 	}
 
 	return err;
@@ -1165,7 +1169,9 @@ done:
 		td->done = 1;
 
 	td_restore_runstate(td, old_state);
-	return 0;
+
+	return zbc_verify_options();
+
 err_offset:
 	log_err("%s: you need to specify valid offset=\n", o->name);
 err_out:
diff --git a/init.c b/init.c
index e47e5384119b..3ea5ea57e3a1 100644
--- a/init.c
+++ b/init.c
@@ -2266,6 +2266,10 @@ const struct debug_level debug_levels[] = {
 	  .help = "Helper thread logging",
 	  .shift = FD_HELPERTHREAD,
 	},
+	{ .name = "zbc",
+	  .help = "Zoned Block Device logging",
+	  .shift = FD_ZBC,
+	},
 	{ .name = NULL, },
 };
 
diff --git a/io_u.c b/io_u.c
index 01b36938d1b5..ba4dbb9d8ecc 100644
--- a/io_u.c
+++ b/io_u.c
@@ -14,6 +14,7 @@
 #include "err.h"
 #include "lib/pow2.h"
 #include "minmax.h"
+#include "zbc.h"
 
 struct io_completion_data {
 	int nr;				/* input */
@@ -558,6 +559,9 @@ static int get_next_offset(struct thread_data *td, struct io_u *io_u,
 	if (get_next_block(td, io_u, ddir, rw_seq_hit, is_random))
 		return 1;
 
+	if (zbc_adjust_block(td, io_u))
+		return 1;
+
 	if (io_u->offset >= f->io_size) {
 		dprint(FD_IO, "get_next_offset: offset %llu >= io_size %llu\n",
 					(unsigned long long) io_u->offset,
diff --git a/ioengines.c b/ioengines.c
index 965581aa4157..a04a977cca9f 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -19,6 +19,7 @@
 
 #include "fio.h"
 #include "diskutil.h"
+#include "zbc.h"
 
 static FLIST_HEAD(engine_list);
 
@@ -320,6 +321,8 @@ int td_io_queue(struct thread_data *td, struct io_u *io_u)
 	}
 
 	ret = td->io_ops->queue(td, io_u);
+	if (ret < FIO_Q_BUSY)
+		zbc_update_wp(td, io_u);
 
 	unlock_file(td, io_u->file);
 
diff --git a/zbc.c b/zbc.c
new file mode 100644
index 000000000000..57e1981ee35d
--- /dev/null
+++ b/zbc.c
@@ -0,0 +1,604 @@
+/*
+ * Copyright (C) 2018 Western Digital Corporation or its affiliates.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <linux/blkzoned.h>
+#include "file.h"
+#include "fio.h"
+#include "log.h"
+#include "zbc.h"
+
+/* Return the name of the first entry in a directory */
+static char *get_first_dirent(const char *dir_path)
+{
+	char *res = NULL;
+	struct dirent *e;
+	DIR *d;
+
+	d = opendir(dir_path);
+	if (!d)
+		return NULL;
+	while ((e = readdir(d))) {
+		/* Skip "." and ".." */
+		if (e->d_name[0] == '.')
+			continue;
+		res = strdup(e->d_name);
+		break;
+	}
+	closedir(d);
+
+	return res;
+}
+
+/*
+ * Convert a block device name into a SCSI device path, e.g. /dev/sdc into
+ * /sys/class/scsi_device/0:0:0:0.
+ */
+static int bdev_to_scsi(char **scsi_id, const char *bdev)
+{
+	char *bdev_path = NULL, *bdev2 = NULL;
+	struct dirent *e;
+	bool matches;
+	DIR *d;
+	int res = 0;
+
+	if (strncmp(bdev, "/dev/", 5) != 0)
+		return -EINVAL;
+	d = opendir("/sys/class/scsi_device");
+	if (!d)
+		return -ENOMEM;
+	while ((e = readdir(d))) {
+		/* Skip "." and ".." */
+		if (e->d_name[0] == '.')
+			continue;
+		free(bdev_path);
+		bdev_path = NULL;
+		res = -ENOMEM;
+		if (asprintf(&bdev_path,
+			     "/sys/class/scsi_device/%s/device/block",
+			     e->d_name) < 0)
+			break;
+		bdev2 = get_first_dirent(bdev_path);
+		matches = bdev2 && strcmp(bdev2, bdev + 5) == 0;
+		free(bdev2);
+		if (matches) {
+			*scsi_id = strdup(e->d_name);
+			res = 0;
+			break;
+		}
+		res = -ENOENT;
+	}
+	closedir(d);
+
+	free(bdev_path);
+
+	return res;
+}
+
+/*
+ * Get the SCSI device type from VPD page 0x80. That device type is called the
+ * "peripheral device type" in the SCSI SPC-5 standard. Returns -ENXIO if and
+ * only if the device is not a SCSI device.
+ */
+static int get_scsi_device_type(const char *bdev)
+{
+	char *scsi_id = NULL, *vpd_pg80_path = NULL;
+	uint8_t vpd_pg80[8];
+	int vpd_fd;
+	int ret;
+
+	ret = bdev_to_scsi(&scsi_id, bdev);
+	if (ret < 0 && ret != -ENOENT)
+		return ret;
+	if (scsi_id == NULL)
+		return -ENXIO;
+	dprint(FD_ZBC, "Block device %s has SCSI ID %s\n", bdev, scsi_id);
+	ret = -ENOMEM;
+	if (asprintf(&vpd_pg80_path,
+		     "/sys/class/scsi_device/%s/device/vpd_pg80", scsi_id) < 0)
+		goto out;
+
+	vpd_fd = open(vpd_pg80_path, O_RDONLY);
+	if (vpd_fd < 0)
+		goto out;
+	ret = read(vpd_fd, vpd_pg80, sizeof(vpd_pg80));
+	close(vpd_fd);
+	if (ret < sizeof(vpd_pg80))
+		goto out;
+
+	ret = vpd_pg80[0] & 0x1f;
+
+out:
+	free(vpd_pg80_path);
+	free(scsi_id);
+	return ret;
+}
+
+/**
+ * zbc_reset_zone - reset the write pointer of one or more zones
+ * @f: FIO file associated with the disk for which to reset write pointers
+ * @sector: First sector for which to reset the write pointer in units of 512
+ *	bytes.
+ * @nr_sectors: Number of sectors to reset the write pointer of.
+ */
+static int zbc_reset_zone(const struct fio_file *f, uint64_t sector,
+			  uint64_t nr_sectors)
+{
+	struct blk_zone_range zr = {
+		.sector = sector,
+		.nr_sectors = nr_sectors
+	};
+	int ret;
+
+	ret = ioctl(f->fd, BLKRESETZONE, &zr);
+	if (ret < 0)
+		log_err("%s: resetting wp for %lu sectors at sector %lu failed (%d).\n",
+			f->file_name, nr_sectors, sector, errno);
+	return ret;
+}
+
+/*
+ * Read zone information into @buf starting from sector @start_sector.
+ * @fd is a file descriptor that refers to a block device and @bufsz is the
+ * size of @buf.
+ */
+static int read_zone_info(int fd, uint64_t start_sector,
+			  void *buf, unsigned int bufsz)
+{
+	struct blk_zone_report *hdr = buf;
+
+	if (bufsz < sizeof(*hdr))
+		return -EINVAL;
+
+	memset(hdr, 0, sizeof(*hdr));
+
+	hdr->nr_zones = (bufsz - sizeof(*hdr)) / sizeof(struct blk_zone);
+	hdr->sector = start_sector;
+	return ioctl(fd, BLKREPORTZONE, hdr);
+}
+
+/*
+ * Initialize f->zbd_info.
+ */
+int zbc_init_zone_info(struct fio_file *f)
+{
+	const unsigned int bufsz = sizeof(struct blk_zone_report) +
+		32768 * sizeof(struct blk_zone);
+	unsigned int nr_zones;
+	struct blk_zone_report *hdr;
+	const struct blk_zone *z;
+	struct fio_zone_info *p;
+	uint64_t zone_size, start_sector;
+	struct zoned_block_device_info *zbd_info = NULL;
+	void *buf;
+	int fd, i, j, ret = -ENOMEM;
+
+	ret = get_scsi_device_type(f->file_name);
+	if (ret == -ENXIO) {
+		dprint(FD_ZBC, "%s: not a SCSI device\n", f->file_name);
+		ret = 0;
+		goto out;
+	}
+	if (ret < 0)
+		log_info("fio: unable to determine device type for %s.\n",
+			 f->file_name);
+
+	dprint(FD_ZBC, "Block device %s has SCSI device type %#x\n",
+	       f->file_name, ret);
+
+	if (ret != 0x14 /* ZBC */) {
+		ret = 0;
+		goto out;
+	}
+
+	dprint(FD_ZBC, "Reading zone information for device %s\n",
+	       f->file_name);
+
+	buf = malloc(bufsz);
+	if (!buf)
+		goto out;
+
+	fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
+	if (fd < 0) {
+		ret = -errno;
+		goto free;
+	}
+
+	ret = read_zone_info(fd, 0, buf, bufsz);
+	if (ret < 0) {
+		log_info("fio: BLKREPORTZONE(%lu) failed for %s (%d).\n",
+			 0UL, f->file_name, errno);
+		goto close;
+	}
+	hdr = buf;
+	if (hdr->nr_zones < 1) {
+		log_info("fio: %s has invalid zone information.\n",
+			 f->file_name);
+		goto close;
+	}
+	z = (void *)(hdr + 1);
+	zone_size = z->len;
+	nr_zones = (f->real_file_size >> 9) / zone_size;
+
+	dprint(FD_ZBC, "Device %s has %d zones of size %lu\n", f->file_name,
+	       nr_zones, zone_size);
+
+	zbd_info = calloc(1, sizeof(*zbd_info) +
+			  (nr_zones + 1) * sizeof(zbd_info->zone_info[0]));
+	ret = -ENOMEM;
+	if (!zbd_info)
+		goto close;
+	p = &zbd_info->zone_info[0];
+	for (start_sector = 0, j = 0; j < nr_zones;) {
+		z = (void *)(hdr + 1);
+		for (i = 0; i < hdr->nr_zones; i++, j++, z++, p++) {
+			p->start = z->start;
+			p->wp = z->wp;
+			p->type = z->type;
+			if (j > 0 && p->start != p[-1].start + zone_size) {
+				log_info("%s: invalid zone data\n",
+					 f->file_name);
+				ret = -EINVAL;
+				goto close;
+			}
+		}
+		z--;
+		start_sector = z->start + z->len;
+		if (j >= nr_zones)
+			break;
+		ret = read_zone_info(fd, start_sector, buf, bufsz);
+		if (ret < 0) {
+			log_info("fio: BLKREPORTZONE(%lu) failed for %s (%d).\n",
+				 start_sector, f->file_name, errno);
+			goto close;
+		}
+	}
+	/* a sentinel */
+	zbd_info->zone_info[nr_zones].start = start_sector;
+
+	f->zbd_info = zbd_info;
+	f->zbd_info->zone_size = zone_size;
+	f->zbd_info->nr_zones = nr_zones;
+	zbd_info = NULL;
+	ret = 0;
+
+close:
+	free(zbd_info);
+	close(fd);
+free:
+	free(buf);
+out:
+	return ret;
+}
+
+/**
+ * zbc_zone_idx - convert an offset into a zone number
+ * @td: thread data.
+ * @f: file pointer.
+ * @offset: offset in bytes. If this offset equals the disk size then the
+ *	    index of the sentinel is returned.
+ */
+static uint32_t zbc_zone_idx(const struct thread_data *td,
+			     const struct fio_file *f, uint64_t offset)
+{
+	uint32_t zone_idx = (offset >> 9) / f->zbd_info->zone_size;
+
+	assert(offset <= f->real_file_size);
+	assert(zone_idx <= f->zbd_info->nr_zones);
+	return zone_idx;
+}
+
+static bool zone_full(const struct fio_file *f, const struct fio_zone_info *z)
+{
+	return z->wp >= z->start + f->zbd_info->zone_size;
+}
+
+static bool is_valid_offset(const struct fio_file *f, uint64_t offset)
+{
+	return (uint64_t)(offset - f->file_offset) < f->io_size;
+}
+
+/* Verify whether direct I/O is used for all ZBC drives. */
+static bool zbc_using_direct_io(void)
+{
+	struct thread_data *td;
+	struct fio_file *f;
+	int i, j;
+
+	for_each_td(td, i) {
+		if (td->o.odirect)
+			continue;
+		for_each_file(td, f, j) {
+			if (f->zbd_info)
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static bool other_job_writes_to(struct thread_data *const td,
+				struct fio_file *const f,
+				const int i, const int j)
+{
+	struct thread_data *td2;
+	struct fio_file *f2;
+	int k, m;
+
+	for_each_td(td2, k) {
+		if ((td->o.td_ddir & (TD_DDIR_WRITE | TD_DDIR_TRIM)) == 0)
+			continue;
+		for_each_file(td2, f2, m) {
+			if (k == i && m == j)
+				continue;
+			if (f2->zbd_info &&
+			    strcmp(f->file_name, f2->file_name) == 0)
+				return true;
+		}
+	}
+
+	return false;
+}
+
+/*
+ * Check whether multiple ZBC write or trim jobs have been specified for the
+ * same drive.
+ *
+ * To do: refine this code such that locking is only required if multiple
+ * ZBC write or trim jobs have been specified for the same drive.
+ */
+static bool zbc_multiple_writers(void)
+{
+	struct thread_data *td;
+	struct fio_file *f;
+	int i, j;
+
+	for_each_td(td, i) {
+		if ((td->o.td_ddir & (TD_DDIR_WRITE | TD_DDIR_TRIM)) == 0)
+			continue;
+		for_each_file(td, f, j)
+			if (f->zbd_info &&
+			    other_job_writes_to(td, f, i, j))
+				return true;
+	}
+
+	return false;
+}
+
+static bool zbc_verify_sizes(void)
+{
+	const struct fio_zone_info *z;
+	struct thread_data *td;
+	struct fio_file *f;
+	uint64_t new_offset, new_start;
+	uint32_t zone_idx;
+	int i, j;
+
+	for_each_td(td, i) {
+		for_each_file(td, f, j) {
+			if (!f->zbd_info)
+				continue;
+			zone_idx = zbc_zone_idx(td, f, f->file_offset);
+			z = &f->zbd_info->zone_info[zone_idx];
+			if ((z->start << 9) != f->file_offset) {
+				new_offset = (z->start +
+					      f->zbd_info->zone_size) << 9;
+				log_info("%s: rounded up offset from %lu to %lu\n",
+					 f->file_name, f->file_offset,
+					 new_offset);
+				f->io_size -= (new_offset - f->file_offset);
+				f->file_offset = new_offset;
+			}
+			zone_idx = zbc_zone_idx(td, f, f->file_offset +
+						f->io_size);
+			z = &f->zbd_info->zone_info[zone_idx];
+			new_start = z->start << 9;
+			if (f->file_offset + f->io_size != new_start) {
+				if (new_start == f->file_offset) {
+					log_info("%s: io_size must be at least one zone\n",
+						 f->file_name);
+					return false;
+				}
+				log_info("%s: rounded down io_size from %lu to %lu\n",
+					 f->file_name, f->io_size, new_start);
+				f->io_size = new_start;
+			}
+		}
+	}
+
+	return true;
+}
+
+int zbc_verify_options(void)
+{
+	if (!zbc_using_direct_io()) {
+		log_err("Using direct I/O is mandatory for ZBC drives\n\n");
+		return 1;
+	}
+
+	if (zbc_multiple_writers()) {
+		log_err("Concurrent writing to ZBC disks is not supported\n\n");
+		return 1;
+	}
+
+	if (!zbc_verify_sizes())
+		return 1;
+
+	return 0;
+}
+
+/**
+ * zbc_adjust_block - adjust the offset and length as necessary for ZBC drives
+ * @td: FIO thread data.
+ * @io_u: FIO I/O unit.
+ *
+ * Returns 0 if the I/O unit should be used and 1 if not.
+ */
+int zbc_adjust_block(const struct thread_data *td, struct io_u *io_u)
+{
+	const struct fio_file *f = io_u->file;
+	uint32_t zone_idx_b, zone_idx_e;
+	struct fio_zone_info *zb, *ze;
+	uint64_t offset, orig_o = io_u->offset;
+	uint32_t orig_len = io_u->buflen;
+	uint64_t delta;
+
+	if (!f->zbd_info)
+		return 0;
+
+	zone_idx_b = zbc_zone_idx(td, f, f->file_offset + io_u->offset);
+	zone_idx_e = zbc_zone_idx(td, f, f->file_offset + io_u->offset +
+				  (io_u->buflen ? io_u->buflen - 1 : 0));
+	zb = &f->zbd_info->zone_info[zone_idx_b];
+	ze = &f->zbd_info->zone_info[zone_idx_e];
+
+	switch (io_u->ddir) {
+	case DDIR_READ:
+		/*
+		 * From the ZBC spec: a read operation past the write pointer
+		 * of a zone shall return logical block data set to the last
+		 * initialization pattern that was set at manufacture time,
+		 * by the FORMAT UNIT command or by the most recent SANITIZE
+		 * command with the service action set to OVERWRITE. Hence,
+		 * for random I/O, do not read past the write pointer.
+		 */
+		if (!td_random(td))
+			return 0;
+		if (io_u->buflen > ((zb->wp - zb->start) << 9))
+			return 1;
+		if (io_u->offset + io_u->buflen > (zb->wp << 9)) {
+			io_u->offset = (zb->wp << 9) - io_u->buflen;
+			dprint(FD_IO,
+			       "changed write offset from %ld into %lld\n",
+			       orig_o, io_u->offset);
+		}
+		return 0;
+	case DDIR_WRITE:
+		if (zb->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
+			return 0;
+		/* Make writes occur at the write pointer */
+		if (zone_full(f, zb)) {
+			if (zbc_reset_zone(f, zb->start,
+					   f->zbd_info->zone_size) < 0)
+				return 1;
+			zb->wp = zb->start;
+			offset = zb->start << 9;
+			dprint(FD_IO,
+			       "reset zone write pointer at offset %ld\n",
+			       offset);
+		} else {
+			offset = zb->wp << 9;
+		}
+		if (!is_valid_offset(f, offset))
+			return 1;
+		io_u->offset = offset - f->file_offset;
+		if (orig_o != io_u->offset)
+			dprint(FD_IO,
+			       "changed write offset from %ld into %lld\n",
+			       orig_o, io_u->offset);
+		/* Shrink write requests that cross zone boundaries. */
+		if (zone_idx_b != zone_idx_e) {
+			io_u->buflen = ((zb->start + f->zbd_info->zone_size)
+					<< 9) - (io_u->offset + f->file_offset);
+			dprint(FD_IO, "Changed length from %u into %lu\n",
+			       orig_len, io_u->buflen);
+		}
+		return 0;
+	case DDIR_TRIM:
+		/* Align trims to zone boundaries. */
+		if (zone_idx_b == zone_idx_e) {
+			if (zb->type == BLK_ZONE_TYPE_SEQWRITE_REQ &&
+			    io_u->buflen < f->zbd_info->zone_size)
+				io_u->buflen = 0;
+		} else {
+			if (zb->type == BLK_ZONE_TYPE_SEQWRITE_REQ) {
+				delta = f->zbd_info->zone_size -
+					(f->file_offset + io_u->offset -
+					 (zb->start << 9));
+				io_u->offset += delta;
+				io_u->buflen -= delta;
+			}
+			if (ze->type == BLK_ZONE_TYPE_SEQWRITE_REQ)
+				io_u->buflen -= (f->file_offset +
+						 io_u->offset + io_u->buflen) -
+					(ze->start << 9);
+		}
+		dprint(FD_IO, "Changed trim range from %lu + %u into %llu + %lu (adjustments: offset + %llu; len - %lu)\n",
+		       orig_o, orig_len, io_u->offset, io_u->buflen,
+		       io_u->offset - orig_o, orig_len - io_u->buflen);
+		return 0;
+	case DDIR_SYNC:
+	case DDIR_DATASYNC:
+	case DDIR_SYNC_FILE_RANGE:
+	case DDIR_WAIT:
+	case DDIR_LAST:
+	case DDIR_INVAL:
+		return 0;
+	}
+
+	assert(false);
+	return 1;
+}
+
+/**
+ * zbc_update_wp - update the write pointer
+ * @td: thread data
+ * @io_u: I/O unit
+ *
+ * For write and trim operations, update the write pointer of all affected
+ * zones.
+ */
+void zbc_update_wp(struct thread_data *td, const struct io_u *io_u)
+{
+	struct zoned_block_device_info *zbd_info;
+	struct fio_zone_info *z;
+	uint32_t zone_idx;
+	uint64_t end;
+
+	if (!io_u->file->zbd_info)
+		return;
+
+	switch (io_u->ddir) {
+	case DDIR_READ:
+	case DDIR_SYNC:
+	case DDIR_DATASYNC:
+	case DDIR_SYNC_FILE_RANGE:
+	case DDIR_WAIT:
+	case DDIR_LAST:
+	case DDIR_INVAL:
+		return;
+	case DDIR_WRITE:
+	case DDIR_TRIM:
+		break;
+	}
+
+	zbd_info = io_u->file->zbd_info;
+	zone_idx = zbc_zone_idx(td, io_u->file, io_u->offset);
+	end = (io_u->offset + io_u->buflen) >> 9;
+	for (z = &zbd_info->zone_info[zone_idx]; z->start < end;
+	     z++, zone_idx++) {
+		assert(zone_idx < zbd_info->nr_zones);
+		if (z->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
+			continue;
+		switch (io_u->ddir) {
+		case DDIR_WRITE:
+			z->wp = end;
+			break;
+		case DDIR_TRIM:
+			z->wp = z->start;
+			break;
+		default:
+			assert(false);
+			break;
+		}
+	}
+}
diff --git a/zbc.h b/zbc.h
new file mode 100644
index 000000000000..de2a39551ca8
--- /dev/null
+++ b/zbc.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2018 Western Digital Corporation or its affiliates.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef FIO_ZBC_H
+#define FIO_ZBC_H
+
+#include <inttypes.h>
+
+struct fio_file;
+
+/**
+ * struct fio_zone_info - information about a single ZBC zone
+ * @start: zone start in 512 byte units
+ * @wp: zone write pointer location in 512 byte units
+ * @type: zone type as defined by enum blk_zone_type
+ */
+struct fio_zone_info {
+	uint64_t	start;
+	uint64_t	wp;
+	uint8_t		type;
+};
+
+/**
+ * zoned_block_device_info - zoned block device characteristics
+ * @zone_size: size of a single zone in units of 512 bytes
+ * @nr_zones: number of zones
+ * @zone_info: description of the individual zones
+ *
+ * Only devices for which all zones have the same size are supported.
+ * Note: if the capacity is not a multiple of the zone size then the last zone
+ * will be smaller than 'zone_size'.
+ */
+struct zoned_block_device_info {
+	uint64_t		zone_size;
+	uint64_t		nr_zones;
+	struct fio_zone_info	zone_info[0];
+};
+
+#ifdef HAVE_LINUX_BLKZONED_H
+int zbc_init_zone_info(struct fio_file *f);
+int zbc_verify_options(void);
+int zbc_adjust_block(const struct thread_data *td, struct io_u *io_u);
+void zbc_update_wp(struct thread_data *td, const struct io_u *io_u);
+#else
+static inline int zbc_init_zone_info(struct fio_file *f)
+{
+	return 0;
+}
+
+static inline int zbc_verify_options(void)
+{
+	return 0;
+}
+
+static inline int zbc_adjust_block(struct thread_data *td, struct io_u *io_u)
+{
+	return 0;
+}
+
+static inline void zbc_update_wp(struct thread_data *td,
+				 const struct io_u *io_u)
+{
+}
+#endif
+
+#endif /* FIO_ZBC_H */

next prev parent reply	other threads:[~2018-03-15 16:43 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-06 18:59 ZBC/FLEX FIO addition ideas Phillip Chen
2018-03-15 16:15 ` Kris Davis
2018-03-15 16:30   ` Sitsofe Wheeler
2018-03-15 16:43     ` Bart Van Assche [this message]
2018-03-15 18:06       ` Phillip Chen
2018-03-15 18:38         ` Bart Van Assche
2018-03-15 19:40           ` Phillip Chen
2018-03-15 21:01             ` Bart Van Assche
2018-03-17  7:55       ` Sitsofe Wheeler
2018-03-20  2:21         ` Bart Van Assche
2018-03-23 17:30           ` Phillip Chen
2018-03-23 17:35             ` Bart Van Assche

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:8f4871c6352 dfblob:44dd7f3439f dfblob:4442a1cc0d7
dfblob:688e8620132 dfblob:b8718ddc225 dfblob:c69c8079bed
dfblob:8fd34b136c2 dfblob:42b30462982 dfblob:7cbce1327f8
dfblob:d981c61f5b7 dfblob:e47e5384119 dfblob:3ea5ea57e3a
dfblob:01b36938d1b dfblob:ba4dbb9d8ec dfblob:965581aa415
dfblob:a04a977cca9 dfblob:57e1981ee35 dfblob:de2a39551ca )
 OR (
bs:"Re: ZBC/FLEX FIO addition ideas" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1521132231.2834.4.camel@wdc.com \
    --to=bart.vanassche@wdc.com \
    --cc=Kris.Davis@wdc.com \
    --cc=fio@vger.kernel.org \
    --cc=jason.jorgensen@wdc.com \
    --cc=phillip.a.chen@seagate.com \
    --cc=sitsofe@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox