linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] st: implement sysfs based tape statistics v2
@ 2015-01-13  3:43 Seymour, Shane M
  2015-01-27  0:11 ` Seymour, Shane M
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Seymour, Shane M @ 2015-01-13  3:43 UTC (permalink / raw)
  To: linux-scsi@vger.kernel.org
  Cc: Kai.Makisara@kolumbus.fi,
	James E.J. Bottomley (JBottomley@parallels.com), jeffm@suse.com

Some small changes since the last version - this version removes two files from sysfs compared to the last version (read and write block counts since they're derived from the byte counts they can be calculated in user space) but that's the only change. This version has been rebased to 3.19.0-rc3-next-20150108.

Since posting the last version an email was received by Kai and myself from an AT&T employee who has a need for tape statistics to be implemented (who gave permission to quote their email), I've included part of the email here:

"There are over 20,000 tape devices managed by our operations group zoned to tens of thousands of servers.

My challenge is that I cannot provide operations a solution that gives them visibility into the tape drive performance metrics when that platform is linux. Our legacy platforms (Solaris,HPUX,AIX) provide facilities to use iostat and sar to determine the write speed of the tape drives. We took for granted that this would be available in linux and its absence has been very troublesome. Because operations cannot measure tape drive performance in this way they cannot easily determine when there is a tape drive performance problem and whether the change improved or worsened the performance problem.
...
I have followed the debate https://lkml.org/lkml/2013/3/20/696 and from a service provide perspective we would expect the same maturity and functionality that we have from the traditional unix platform in regards to iostat/sar. This issue is important and urgent because tape drive performance issues are common and I am unable to provide standards and processes to identify and remediate these issues."

Another HP customer has also requested the same functionality (but hasn't given permission to be named), they own tape drives numbering in the 1000s and also need the ability to investigate performance issues.

Signed-off-by: shane.seymour@hp.com
Tested-by: shane.seymour@hp.com
---
diff -uprN a/drivers/scsi/st.c b/drivers/scsi/st.c
--- a/drivers/scsi/st.c	2015-01-11 14:46:00.243814755 -0600
+++ b/drivers/scsi/st.c	2015-01-12 13:54:52.549117333 -0600
@@ -20,6 +20,7 @@
 static const char *verstr = "20101219";
 
 #include <linux/module.h>
+#include <linux/kobject.h>
 
 #include <linux/fs.h>
 #include <linux/kernel.h>
@@ -226,6 +227,20 @@ static DEFINE_SPINLOCK(st_index_lock);
 static DEFINE_SPINLOCK(st_use_lock);
 static DEFINE_IDR(st_index_idr);
 
+static inline void st_stats_remove_files(struct scsi_tape *);
+static inline void st_stats_create_files(struct scsi_tape *);
+static ssize_t st_tape_attr_show(struct kobject *, struct attribute *, char *);
+static ssize_t st_tape_attr_store(struct kobject *, struct attribute *,
+	const char *, size_t);
+static void st_release_stats_kobj(struct kobject *);
+static const struct sysfs_ops st_stats_sysfs_ops = {
+	.show   = st_tape_attr_show,
+	.store  = st_tape_attr_store,
+};
+static struct kobj_type st_stats_ktype = {
+	.release = st_release_stats_kobj,
+	.sysfs_ops = &st_stats_sysfs_ops,
+};
 
 

 #include "osst_detect.h"
@@ -476,10 +491,22 @@ static void st_scsi_execute_end(struct r
 	struct st_request *SRpnt = req->end_io_data;
 	struct scsi_tape *STp = SRpnt->stp;
 	struct bio *tmp;
+	u64 ticks;
 
 	STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors;
 	STp->buffer->cmdstat.residual = req->resid_len;
 
+	if (STp->stats != NULL) {
+		ticks = get_jiffies_64();
+		STp->stats->in_flight--;
+		ticks -= STp->stats->stamp;
+		STp->stats->io_ticks += ticks;
+		if (req->cmd[0] == WRITE_6)
+			STp->stats->write_ticks += ticks;
+		else if (req->cmd[0] == READ_6)
+			STp->stats->read_ticks += ticks;
+	}
+
 	tmp = SRpnt->bio;
 	if (SRpnt->waiting)
 		complete(SRpnt->waiting);
@@ -496,6 +523,7 @@ static int st_scsi_execute(struct st_req
 	struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data;
 	int err = 0;
 	int write = (data_direction == DMA_TO_DEVICE);
+	struct scsi_tape *STp = SRpnt->stp;
 
 	req = blk_get_request(SRpnt->stp->device->request_queue, write,
 			      GFP_KERNEL);
@@ -516,6 +544,20 @@ static int st_scsi_execute(struct st_req
 		}
 	}
 
+	if (STp->stats != NULL) {
+		if (cmd[0] == WRITE_6) {
+			STp->stats->write_cnt++;
+			STp->stats->write_byte_cnt += bufflen;
+		} else if (cmd[0] == READ_6) {
+			STp->stats->read_cnt++;
+			STp->stats->read_byte_cnt += bufflen;
+		} else {
+			STp->stats->other_cnt++;
+		}
+		STp->stats->stamp = get_jiffies_64();
+		STp->stats->in_flight++;
+	}
+
 	SRpnt->bio = req->bio;
 	req->cmd_len = COMMAND_SIZE(cmd[0]);
 	memset(req->cmd, 0, BLK_MAX_CDB);
@@ -4064,6 +4106,8 @@ out:
 static int create_cdevs(struct scsi_tape *tape)
 {
 	int mode, error;
+	struct scsi_tape_stats *tmp;
+
 	for (mode = 0; mode < ST_NBR_MODES; ++mode) {
 		error = create_one_cdev(tape, mode, 0);
 		if (error)
@@ -4072,6 +4116,26 @@ static int create_cdevs(struct scsi_tape
 		if (error)
 			return error;
 	}
+/* Create statistics directory under device, if it fails we dont
+   have statistics. */
+	if (tape->stats != NULL) {
+		kobject_init(&tape->stats->statistics, &st_stats_ktype);
+		error = kobject_add(&tape->stats->statistics,
+			&tape->device->sdev_gendev.kobj,
+			"statistics");
+		if (error) {
+			kobject_del(&tape->stats->statistics);
+			tmp = tape->stats;
+			tape->stats = NULL;
+			kfree(tmp);
+		} else {
+			st_stats_create_files(tape);
+		}
+	} else {
+		tmp = tape->stats;
+		tape->stats = NULL;
+		kfree(tmp);
+	}
 
 	return sysfs_create_link(&tape->device->sdev_gendev.kobj,
 				 &tape->modes[0].devs[0]->kobj, "tape");
@@ -4081,6 +4145,10 @@ static void remove_cdevs(struct scsi_tap
 {
 	int mode, rew;
 	sysfs_remove_link(&tape->device->sdev_gendev.kobj, "tape");
+	if (tape->stats != NULL) {
+		st_stats_remove_files(tape);
+		kobject_del(&tape->stats->statistics);
+	}
 	for (mode = 0; mode < ST_NBR_MODES; mode++) {
 		struct st_modedef *STm = &(tape->modes[mode]);
 		for (rew = 0; rew < 2; rew++) {
@@ -4222,6 +4290,8 @@ static int st_probe(struct device *dev)
 	}
 	tpnt->index = error;
 	sprintf(disk->disk_name, "st%d", tpnt->index);
+/* Allocate stats structure */
+	tpnt->stats = kzalloc(sizeof(struct scsi_tape_stats), GFP_ATOMIC);
 
 	dev_set_drvdata(dev, tpnt);
 
@@ -4241,6 +4311,7 @@ static int st_probe(struct device *dev)
 
 out_remove_devs:
 	remove_cdevs(tpnt);
+	dev_set_drvdata(dev, NULL);
 	spin_lock(&st_index_lock);
 	idr_remove(&st_index_idr, tpnt->index);
 	spin_unlock(&st_index_lock);
@@ -4248,6 +4319,8 @@ out_put_queue:
 	blk_put_queue(disk->queue);
 out_put_disk:
 	put_disk(disk);
+	if (tpnt->stats != NULL)
+		kfree(tpnt->stats);
 	kfree(tpnt);
 out_buffer_free:
 	kfree(buffer);
@@ -4288,6 +4361,7 @@ static void scsi_tape_release(struct kre
 	struct scsi_tape *tpnt = to_scsi_tape(kref);
 	struct gendisk *disk = tpnt->disk;
 
+	dev_set_drvdata(&tpnt->device->sdev_gendev, NULL);
 	tpnt->device = NULL;
 
 	if (tpnt->buffer) {
@@ -4298,6 +4372,10 @@ static void scsi_tape_release(struct kre
 
 	disk->private_data = NULL;
 	put_disk(disk);
+	if (tpnt->stats != NULL) {
+		kfree(tpnt->stats);
+		tpnt->stats = NULL;
+	}
 	kfree(tpnt);
 	return;
 }
@@ -4523,6 +4601,412 @@ static struct attribute *st_dev_attrs[]
 };
 ATTRIBUTE_GROUPS(st_dev);
 
+/* Support for tape stats */
+
+struct tape_stats_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct scsi_tape *, char *);
+	ssize_t (*store)(struct scsi_tape *, const char *, size_t);
+};
+
+#define TAPE_STATS_ATTR(_name, _mode, _show, _store)		\
+const struct tape_stats_attr tape_stats_attr_##_name = {	\
+	.attr = {.name  = __stringify(_name), .mode   = _mode },\
+	.show   = _show,					\
+	.store  = _store,					\
+}
+
+void st_release_stats_kobj(struct kobject *dummy)
+{
+}
+
+/**
+ * st_stats_attr_show_read_cnt - return read count - count of reads made
+ * from tape drive
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_read_cnt(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%llu", st->stats->read_cnt);
+	return snprintf(buf, 4096, "%llu", st->stats->sync_read_cnt);
+}
+
+/**
+ * st_stats_attr_show_read_byte_cnt - return read byte count - tape drives
+ * may use blocks less than 512 bytes this gives the raw byte count of
+ * of data read from the tape drive.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_read_byte_cnt(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%llu", st->stats->read_byte_cnt);
+	return snprintf(buf, 4096, "%llu", st->stats->sync_read_byte_cnt);
+}
+
+/**
+ * st_stats_attr_show_read_ms - return read ms - overall time spent waiting
+ * on reads in ms.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_read_ms(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%u",
+			jiffies_to_msecs(st->stats->read_ticks));
+	return snprintf(buf, 4096, "%u",
+		jiffies_to_msecs(st->stats->sync_read_ticks));
+}
+
+/**
+ * st_stats_attr_show_write_cnt - write count - number of user calls
+ * to write(2) that have written data to tape.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_write_cnt(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%llu", st->stats->write_cnt);
+	return snprintf(buf, 4096, "%llu", st->stats->sync_write_cnt);
+}
+
+/**
+ * st_stats_attr_show_write_byte_cnt - write byte count - raw count of
+ * bytes written to tape.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_write_byte_cnt(struct scsi_tape *st,
+	char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%llu", st->stats->write_byte_cnt);
+	return snprintf(buf, 4096, "%llu", st->stats->sync_write_byte_cnt);
+}
+
+/**
+ * st_stats_attr_show_write_ms - write ms - number of milliseconds since
+ * last open waiting on write requests to complete.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_write_ms(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%u",
+			jiffies_to_msecs(st->stats->write_ticks));
+	return snprintf(buf, 4096, "%u",
+		jiffies_to_msecs(st->stats->sync_write_ticks));
+}
+
+/**
+ * st_stats_attr_show_in_flight - number of I/Os currently in flight -
+ * in most cases this will be either 0 or 1. It may be higher if someone
+ * has also issued other SCSI commands such as via an ioctl.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_in_flight(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%llu", st->stats->in_flight);
+	return snprintf(buf, 4096, "%llu", st->stats->sync_in_flight);
+}
+
+/**
+ * st_stats_attr_show_io_ms - io wait ms - this is the number of ms spent
+ * waiting on other I/O to complete. This includes tape movement commands
+ * such as rewinding, seeking to end of file or tape, etc. Except in
+ * complex tape management programs these will be indirect commands issued
+ * by the driver - e.g. rewind on close.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_io_ms(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%u",
+			jiffies_to_msecs(st->stats->io_ticks));
+	return snprintf(buf, 4096, "%u",
+		jiffies_to_msecs(st->stats->sync_io_ticks));
+}
+
+/**
+ * st_stats_attr_show_other_cnt - other io count - this is the number of
+ * I/O requests that make up the time returned from st_stats_attr_show_io_ms.
+ * Typically these are tape movement requests but will include driver
+ * tape movement. This includes on requests seen by the st driver.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_other_cnt(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%llu", st->stats->other_cnt);
+	return snprintf(buf, 4096, "%llu", st->stats->sync_other_cnt);
+}
+
+/**
+ * st_stats_attr_show_sync - if 0 is returned the stats are being read
+ * process holding the stats in sync. The value of the sync file should not
+ * be non-zero for long. You should be making sure that it is left non-zero
+ * for the shortest possible time.
+ * If you do not care about them being in sync you need to take some
+ * action if the sync value is non-zero as someone could have left it
+ * non-zero which means the values never change. You should write a "0"
+ * to the sync file before you read the other stats files if you have
+ * permission to so.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_sync(struct scsi_tape *st, char *buf)
+{
+	return snprintf(buf, 4096, "%llu", st->stats->sync);
+}
+
+/**
+ * st_stats_attr_store_sync - store the sync value this causes the current
+ * stats to be saved to their save variables and values are then read from
+ * them to get a consistent set of values. You must read back the sync value
+ * after you have read all the values required and check if it has changed
+ * and decide if you will resync and reread the values if it has been changed.
+ * Any program writing a sync value must set it back to 0 once they are done.
+ * You should write your current pid into the sync so you can determine if
+ * someone else is changing the sync value. There is still a chance for a race
+ * but the sync file is the only method I could think of when writing this to
+ * get a best approximation of a set of in sync statistics.
+ * @st: scsi_tape structure.
+ * @buf: buffer to containing sync value
+ * @len length of data
+ **/
+static ssize_t st_stats_attr_store_sync(struct scsi_tape *st, const char *buf,
+	size_t len)
+{
+	if (kstrtoull(buf, 0, &st->stats->sync) != 0)
+		st->stats->sync = 0;
+	if (st->stats->sync == 0)
+		return len;
+	st->stats->sync_read_byte_cnt = st->stats->read_byte_cnt;
+	st->stats->sync_write_byte_cnt = st->stats->write_byte_cnt;
+	st->stats->sync_in_flight = st->stats->in_flight;
+	st->stats->sync_read_cnt = st->stats->read_cnt;
+	st->stats->sync_write_cnt = st->stats->write_cnt;
+	st->stats->sync_other_cnt = st->stats->other_cnt;
+	st->stats->sync_read_ticks = st->stats->read_ticks;
+	st->stats->sync_write_ticks = st->stats->write_ticks;
+	st->stats->sync_io_ticks = st->stats->io_ticks;
+	return len;
+}
+
+TAPE_STATS_ATTR(read_cnt, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_read_cnt, NULL);
+TAPE_STATS_ATTR(read_byte_cnt, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_read_byte_cnt, NULL);
+TAPE_STATS_ATTR(read_ms, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_read_ms, NULL);
+TAPE_STATS_ATTR(write_cnt, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_write_cnt, NULL);
+TAPE_STATS_ATTR(write_byte_cnt, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_write_byte_cnt, NULL);
+TAPE_STATS_ATTR(write_ms, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_write_ms, NULL);
+TAPE_STATS_ATTR(in_flight, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_in_flight, NULL);
+TAPE_STATS_ATTR(io_ms, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_io_ms, NULL);
+TAPE_STATS_ATTR(other_cnt, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_other_cnt, NULL);
+TAPE_STATS_ATTR(sync, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH,
+	st_stats_attr_show_sync, st_stats_attr_store_sync);
+#define to_tape_stats_attr(ptr) container_of(ptr, struct tape_stats_attr, attr)
+
+static const struct tape_stats_attr *tape_stats_attr_group[] = {
+	&tape_stats_attr_read_cnt,
+	&tape_stats_attr_read_byte_cnt,
+	&tape_stats_attr_read_ms,
+	&tape_stats_attr_write_cnt,
+	&tape_stats_attr_write_byte_cnt,
+	&tape_stats_attr_write_ms,
+	&tape_stats_attr_in_flight,
+	&tape_stats_attr_io_ms,
+	&tape_stats_attr_other_cnt,
+	&tape_stats_attr_sync,
+	NULL
+};
+
+/**
+ * st_stats_remove_file - remove sysfs atape stats attribute file.
+ * @st: scsi_tape structure.
+ * @attr: device attribute descriptor.
+ */
+static void st_stats_remove_file(struct scsi_tape *st,
+	const struct tape_stats_attr *attr)
+{
+	if (st && st->stats)
+		sysfs_remove_file(&st->stats->statistics, &attr->attr);
+}
+
+/**
+ * st_stats_create_file - create sysfs tape stats attribute file
+ * @st: scsi_tape structure.
+ * @attr: device attribute descriptor.
+ */
+static int st_stats_create_file(struct scsi_tape *st,
+	const struct tape_stats_attr *attr)
+{
+	int error = 0;
+
+	if (st && st->stats)
+		error = sysfs_create_file(&st->stats->statistics, &attr->attr);
+	return error;
+}
+
+/**
+ * st_stats_create_files - register files in the device
+ * statistics directory.
+ * @st: scsi_tape structure.
+ */
+static void st_stats_create_files(struct scsi_tape *st)
+{
+	int i = 0, rval = 0;
+
+	while (tape_stats_attr_group[i] != NULL) {
+		rval = st_stats_create_file(st, tape_stats_attr_group[i]);
+		if (rval != 0)
+			st_stats_remove_file(st, tape_stats_attr_group[i]);
+		i++;
+	}
+}
+
+/**
+ * st_stats_remove_files - remove the files from the statistics directory.
+ * @st: struct scsi_tape
+ */
+
+static void st_stats_remove_files(struct scsi_tape *st)
+{
+	int i = 0;
+
+	while (tape_stats_attr_group[i] != NULL) {
+		st_stats_remove_file(st, tape_stats_attr_group[i]);
+		i++;
+	}
+}
+
+/**
+ * st_tape_attr_store - call functions required to implement the store
+ * functionality. Works similar to show function.
+ * @kobj: struct kobject
+ * @attr: struct attribute
+ * @buf: data provided by caller
+ * @len: length of data provided
+ */
+static ssize_t st_tape_attr_store(struct kobject *kobj, struct attribute *attr,
+	const char *buf, size_t len)
+{
+	struct kobject *ktemp = kobj->parent;
+	struct device *dev;
+	struct scsi_tape *st;
+	struct tape_stats_attr *st_attr = to_tape_stats_attr(attr);
+	ssize_t ret = -EIO;
+	int i = 0;
+/* Make sure that we are being asked for an attribute we created */
+	while (tape_stats_attr_group[i] != NULL) {
+		if (tape_stats_attr_group[i] == st_attr)
+			break;
+		i++;
+	}
+	if (tape_stats_attr_group[i] == NULL)
+		return ret;
+
+	if ((!st_attr->store) || (ktemp == 0))
+		return ret;
+	dev = kobj_to_dev(ktemp);
+	if (dev == 0)
+		return ret;
+
+	mutex_lock(&st_ref_mutex);
+	st = dev_get_drvdata(dev);
+	if (st == 0) {
+		mutex_unlock(&st_ref_mutex);
+		return ret;
+	}
+	kref_get(&st->kref);
+	mutex_unlock(&st_ref_mutex);
+
+	if ((st->stats != NULL) && (st_attr->store))
+		ret = st_attr->store(st, buf, len);
+
+	mutex_lock(&st_ref_mutex);
+	kref_put(&st->kref, scsi_tape_release);
+	mutex_unlock(&st_ref_mutex);
+
+	return ret;
+}
+
+/**
+ * st_tape_attr_show - call the functions that provide the statistics.
+ * This function makes sure that the struct scsi_tape being referred to is
+ * current and has not been deleted (e.g. during an unload of the st
+ * driver or the tape drive disappearing).
+ * @kobj: struct kobject
+ * @attr: struct attribute
+ * @buf: data being returned to caller
+ */
+static ssize_t st_tape_attr_show(struct kobject *kobj, struct attribute *attr,
+	char *buf)
+{
+	struct kobject *ktemp = kobj->parent;
+	struct device *dev;
+	struct scsi_tape *st;
+	struct tape_stats_attr *st_attr = to_tape_stats_attr(attr);
+	ssize_t ret = -EIO;
+	int i = 0;
+/* Make sure that we are being asked for an attribute we created */
+	while (tape_stats_attr_group[i] != NULL) {
+		if (tape_stats_attr_group[i] == st_attr)
+			break;
+		i++;
+	}
+	if (tape_stats_attr_group[i] == NULL)
+		return ret;
+/* kobject passed in is for the statistics directory. We need to look at the
+   parent kobject (part of a struct device) and from there get the struct
+   scsi_tape. If no show function return error as well. */
+	if ((!st_attr->show) || (ktemp == 0) || (buf == 0))
+		return ret;
+	dev = kobj_to_dev(ktemp);
+	if (dev == 0)
+		return ret;
+/* dev_get_drvdata must return 0 if the struct scsi_tape has been freed.
+   Holding st_ref_mutex means it cannot be freed while we check and we
+   grab a reference to the struct scsi_tape before unlocking. */
+	mutex_lock(&st_ref_mutex);
+	st = dev_get_drvdata(dev);
+	if (st == 0) {
+		mutex_unlock(&st_ref_mutex);
+		return ret;
+	}
+	kref_get(&st->kref);
+	mutex_unlock(&st_ref_mutex);
+
+	if ((st->stats != NULL) && (st_attr->show))
+		ret = st_attr->show(st, buf);
+
+	mutex_lock(&st_ref_mutex);
+	kref_put(&st->kref, scsi_tape_release);
+	mutex_unlock(&st_ref_mutex);
+
+	return ret;
+}
+
+
+
 /* The following functions may be useful for a larger audience. */
 static int sgl_map_user_pages(struct st_buffer *STbp,
 			      const unsigned int max_pages, unsigned long uaddr,
diff -uprN a/drivers/scsi/st.h b/drivers/scsi/st.h
--- a/drivers/scsi/st.h	2015-01-11 14:46:00.243814755 -0600
+++ b/drivers/scsi/st.h	2015-01-11 18:04:54.714001944 -0600
@@ -92,6 +92,32 @@ struct st_partstat {
 	int drv_file;
 };
 
+/* Tape statistics */
+struct scsi_tape_stats {
+	struct kobject statistics; /* Object for statistics directory */
+	u64 read_byte_cnt;      /* bytes read */
+	u64 write_byte_cnt;     /* bytes written */
+	u64 in_flight;          /* Number of I/Os in flight */
+	u64 read_cnt;           /* Count of read requests */
+	u64 write_cnt;          /* Count of write requests */
+	u64 other_cnt;          /* Count of other requests either implicit
+					or from user space via ioctl. */
+	u64 read_ticks;         /* Ticks spent completing read requests */
+	u64 write_ticks;        /* Ticks spent completing write requests */
+	u64 io_ticks;           /* Ticks spent doing any I/O */
+	u64 stamp;              /* holds time request was queued */
+	u64 sync;               /* Are stats read in sync */
+	u64 sync_read_byte_cnt;
+	u64 sync_write_byte_cnt;
+	u64 sync_in_flight;
+	u64 sync_read_cnt;
+	u64 sync_write_cnt;
+	u64 sync_other_cnt;
+	u64 sync_read_ticks;
+	u64 sync_write_ticks;
+	u64 sync_io_ticks;
+};
+
 #define ST_NBR_PARTITIONS 4
 
 /* The tape drive descriptor */
@@ -171,6 +197,7 @@ struct scsi_tape {
 #endif
 	struct gendisk *disk;
 	struct kref     kref;
+	struct scsi_tape_stats *stats;
 };
 
 /* Bit masks for use_pf */
diff -uprN a/Documentation/scsi/st.txt b/Documentation/scsi/st.txt
--- a/Documentation/scsi/st.txt	2015-01-11 14:45:56.235815028 -0600
+++ b/Documentation/scsi/st.txt	2015-01-12 11:23:37.885398363 -0600
@@ -151,6 +151,98 @@ A link named 'tape' is made from the SCS
 directory corresponding to the mode 0 auto-rewind device (e.g., st0). 
 
 
+SYSFS AND STATISTICS FOR TAPE DEVICES
+
+The st driver maintains statistics for tape drives inside the sysfs filesystem.
+The following method can be used to locate the statistics directories that are
+available (assuming that sysfs is mounted at /sys):
+
+1. Use opendir(3) on the directory /sys/class/scsi_tape
+2. Use readdir(3) to read the directory contents
+3. Use regcomp(3)/regexec(3) to match directory entries to the extended
+        regular expression "^st[0-9]+$"
+4. Access the statistics from the /sys/class/scsi_tape/<match>/device/statistics
+        directory (where <match> is a directory entry from /sys/class/scsi_tape
+        that matched the extended regular expression)
+
+An example of a matching directory with the full path to the statistics
+directory would be:
+
+/sys/class/scsi_tape/st0/device/statistics
+
+This is not the real path to the statistics directory but it does represent a
+path which makes the directories easiest to find. The statistics directory is
+common to all tape devices associated with a SCSI tape drive the statistics
+will be the same for the st0, nst0, etc.
+
+The statistics directory contains the following files:
+
+1.  in_flight - The number of I/Os currently outstanding to this device.
+2.  io_ms - The amount of time spent waiting (in milliseconds) for all I/O
+        to complete (including read and write). This includes tape movement
+        commands such as seeking between file or set marks and implicit tape
+        movement such as when rewind on close tape devices are used.
+3.  other_cnt - The number of I/Os issued to the tape drive other than read or
+        write commands. The time taken to complete these commands is tracked
+        in io_ms.
+4.  read_byte_cnt - The number of bytes read from the tape drive.
+5.  read_cnt - The number of read requests issued to the tape drive.
+6.  read_ms - The amount of time (in milliseconds) spent waiting for read
+        requests to complete.
+7.  sync - Described below
+8.  write_byte_cnt - The number of bytes written to the tape drive.
+9.  write_cnt - The number of write requests issued to the tape drive.
+10. write_ms - The amount of time (in milliseconds) spent waiting for write
+        requests to complete.
+
+Note: The count statistics are incrememented at the start of an I/O but the
+time they take to complete is not added to the statistics until they complete.
+
+When read all of the statistics may be out of sync. That is they may not be
+on consistent set of statistics because of the amount of time taken to
+open/read/close each statistics file if the tape drive is in use.
+To alleviate that issue and try to make a set of consistent statistics
+the sync file was created.
+
+When a non-zero value is written into the sync file all of the statistics
+are saved. Until a new value is written into the sync file accesses to the
+statistics files will return the saved values instead of the current values
+(writing the same value as previously written causes the statistics to be
+saved again). Writing 0 makes the statistics files provides access to the
+current instead of the saved values.
+
+The following is the suggested way to access the statistics when you require
+a consistent set of statistics:
+
+1. Determine the location of the statistics directory
+2. Open the sync file and write a set value into the file (choose one value
+        that your application uses always - doing so will allow someone else
+        to determine who else is using the sync file)
+3. Open, read, then close the individual statistics files
+4. Read back the sync value and ensure you read back the value that you wrote.
+If the value read back is not the same write your chosen value back into the
+sync file and then re-read the statistics. If the sync value has changed again
+stop after 3 attempts and just use the values you gathered. There should not
+be that many users of the statistics that a collision is likely to happen.
+5. Always write 0 back into the sync file when you are done then close it.
+
+If you wish to manually view the statistics you can do so in the following
+manner:
+
+$ cd /sys/class/scsi_tape/st0/device/statistics
+$ grep [0-9] *
+in_flight:0
+io_ms:33263
+other_cnt:5
+read_byte_cnt:0
+read_cnt:0
+read_ms:0
+sync:0
+write_byte_cnt:104857600
+write_cnt:102400
+write_ms:24069
+
+
 BSD AND SYS V SEMANTICS
 
 The user can choose between these two behaviours of the tape driver by

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH] st: implement sysfs based tape statistics v2
  2015-01-13  3:43 [PATCH] st: implement sysfs based tape statistics v2 Seymour, Shane M
@ 2015-01-27  0:11 ` Seymour, Shane M
  2015-02-06 17:49   ` Jeremy Linton
  2015-02-02 15:16 ` Laurence Oberman
  2015-02-08 17:07 ` Dale R. Worley
  2 siblings, 1 reply; 17+ messages in thread
From: Seymour, Shane M @ 2015-01-27  0:11 UTC (permalink / raw)
  To: linux-scsi@vger.kernel.org
  Cc: Kai.Makisara@kolumbus.fi,
	James E.J. Bottomley (JBottomley@parallels.com), jeffm@suse.com

I was wondering if anyone had any feedback or had any chance to review the changes?

-----Original Message-----
From: linux-scsi-owner@vger.kernel.org [mailto:linux-scsi-owner@vger.kernel.org] On Behalf Of Seymour, Shane M
Sent: Tuesday, January 13, 2015 2:43 PM
To: linux-scsi@vger.kernel.org
Cc: Kai.Makisara@kolumbus.fi; James E.J. Bottomley (JBottomley@parallels.com); jeffm@suse.com
Subject: [PATCH] st: implement sysfs based tape statistics v2

Some small changes since the last version - this version removes two files from sysfs compared to the last version (read and write block counts since they're derived from the byte counts they can be calculated in user space) but that's the only change. This version has been rebased to 3.19.0-rc3-next-20150108.

Since posting the last version an email was received by Kai and myself from an AT&T employee who has a need for tape statistics to be implemented (who gave permission to quote their email), I've included part of the email here:

"There are over 20,000 tape devices managed by our operations group zoned to tens of thousands of servers.

My challenge is that I cannot provide operations a solution that gives them visibility into the tape drive performance metrics when that platform is linux. Our legacy platforms (Solaris,HPUX,AIX) provide facilities to use iostat and sar to determine the write speed of the tape drives. We took for granted that this would be available in linux and its absence has been very troublesome. Because operations cannot measure tape drive performance in this way they cannot easily determine when there is a tape drive performance problem and whether the change improved or worsened the performance problem.
...
I have followed the debate https://lkml.org/lkml/2013/3/20/696 and from a service provide perspective we would expect the same maturity and functionality that we have from the traditional unix platform in regards to iostat/sar. This issue is important and urgent because tape drive performance issues are common and I am unable to provide standards and processes to identify and remediate these issues."

Another HP customer has also requested the same functionality (but hasn't given permission to be named), they own tape drives numbering in the 1000s and also need the ability to investigate performance issues.

Signed-off-by: shane.seymour@hp.com
Tested-by: shane.seymour@hp.com
---
diff -uprN a/drivers/scsi/st.c b/drivers/scsi/st.c
--- a/drivers/scsi/st.c	2015-01-11 14:46:00.243814755 -0600
+++ b/drivers/scsi/st.c	2015-01-12 13:54:52.549117333 -0600
@@ -20,6 +20,7 @@
 static const char *verstr = "20101219";
 
 #include <linux/module.h>
+#include <linux/kobject.h>
 
 #include <linux/fs.h>
 #include <linux/kernel.h>
@@ -226,6 +227,20 @@ static DEFINE_SPINLOCK(st_index_lock);
 static DEFINE_SPINLOCK(st_use_lock);
 static DEFINE_IDR(st_index_idr);
 
+static inline void st_stats_remove_files(struct scsi_tape *);
+static inline void st_stats_create_files(struct scsi_tape *);
+static ssize_t st_tape_attr_show(struct kobject *, struct attribute *, char *);
+static ssize_t st_tape_attr_store(struct kobject *, struct attribute *,
+	const char *, size_t);
+static void st_release_stats_kobj(struct kobject *);
+static const struct sysfs_ops st_stats_sysfs_ops = {
+	.show   = st_tape_attr_show,
+	.store  = st_tape_attr_store,
+};
+static struct kobj_type st_stats_ktype = {
+	.release = st_release_stats_kobj,
+	.sysfs_ops = &st_stats_sysfs_ops,
+};
 
 

 #include "osst_detect.h"
@@ -476,10 +491,22 @@ static void st_scsi_execute_end(struct r
 	struct st_request *SRpnt = req->end_io_data;
 	struct scsi_tape *STp = SRpnt->stp;
 	struct bio *tmp;
+	u64 ticks;
 
 	STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors;
 	STp->buffer->cmdstat.residual = req->resid_len;
 
+	if (STp->stats != NULL) {
+		ticks = get_jiffies_64();
+		STp->stats->in_flight--;
+		ticks -= STp->stats->stamp;
+		STp->stats->io_ticks += ticks;
+		if (req->cmd[0] == WRITE_6)
+			STp->stats->write_ticks += ticks;
+		else if (req->cmd[0] == READ_6)
+			STp->stats->read_ticks += ticks;
+	}
+
 	tmp = SRpnt->bio;
 	if (SRpnt->waiting)
 		complete(SRpnt->waiting);
@@ -496,6 +523,7 @@ static int st_scsi_execute(struct st_req
 	struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data;
 	int err = 0;
 	int write = (data_direction == DMA_TO_DEVICE);
+	struct scsi_tape *STp = SRpnt->stp;
 
 	req = blk_get_request(SRpnt->stp->device->request_queue, write,
 			      GFP_KERNEL);
@@ -516,6 +544,20 @@ static int st_scsi_execute(struct st_req
 		}
 	}
 
+	if (STp->stats != NULL) {
+		if (cmd[0] == WRITE_6) {
+			STp->stats->write_cnt++;
+			STp->stats->write_byte_cnt += bufflen;
+		} else if (cmd[0] == READ_6) {
+			STp->stats->read_cnt++;
+			STp->stats->read_byte_cnt += bufflen;
+		} else {
+			STp->stats->other_cnt++;
+		}
+		STp->stats->stamp = get_jiffies_64();
+		STp->stats->in_flight++;
+	}
+
 	SRpnt->bio = req->bio;
 	req->cmd_len = COMMAND_SIZE(cmd[0]);
 	memset(req->cmd, 0, BLK_MAX_CDB);
@@ -4064,6 +4106,8 @@ out:
 static int create_cdevs(struct scsi_tape *tape)
 {
 	int mode, error;
+	struct scsi_tape_stats *tmp;
+
 	for (mode = 0; mode < ST_NBR_MODES; ++mode) {
 		error = create_one_cdev(tape, mode, 0);
 		if (error)
@@ -4072,6 +4116,26 @@ static int create_cdevs(struct scsi_tape
 		if (error)
 			return error;
 	}
+/* Create statistics directory under device, if it fails we dont
+   have statistics. */
+	if (tape->stats != NULL) {
+		kobject_init(&tape->stats->statistics, &st_stats_ktype);
+		error = kobject_add(&tape->stats->statistics,
+			&tape->device->sdev_gendev.kobj,
+			"statistics");
+		if (error) {
+			kobject_del(&tape->stats->statistics);
+			tmp = tape->stats;
+			tape->stats = NULL;
+			kfree(tmp);
+		} else {
+			st_stats_create_files(tape);
+		}
+	} else {
+		tmp = tape->stats;
+		tape->stats = NULL;
+		kfree(tmp);
+	}
 
 	return sysfs_create_link(&tape->device->sdev_gendev.kobj,
 				 &tape->modes[0].devs[0]->kobj, "tape");
@@ -4081,6 +4145,10 @@ static void remove_cdevs(struct scsi_tap
 {
 	int mode, rew;
 	sysfs_remove_link(&tape->device->sdev_gendev.kobj, "tape");
+	if (tape->stats != NULL) {
+		st_stats_remove_files(tape);
+		kobject_del(&tape->stats->statistics);
+	}
 	for (mode = 0; mode < ST_NBR_MODES; mode++) {
 		struct st_modedef *STm = &(tape->modes[mode]);
 		for (rew = 0; rew < 2; rew++) {
@@ -4222,6 +4290,8 @@ static int st_probe(struct device *dev)
 	}
 	tpnt->index = error;
 	sprintf(disk->disk_name, "st%d", tpnt->index);
+/* Allocate stats structure */
+	tpnt->stats = kzalloc(sizeof(struct scsi_tape_stats), GFP_ATOMIC);
 
 	dev_set_drvdata(dev, tpnt);
 
@@ -4241,6 +4311,7 @@ static int st_probe(struct device *dev)
 
 out_remove_devs:
 	remove_cdevs(tpnt);
+	dev_set_drvdata(dev, NULL);
 	spin_lock(&st_index_lock);
 	idr_remove(&st_index_idr, tpnt->index);
 	spin_unlock(&st_index_lock);
@@ -4248,6 +4319,8 @@ out_put_queue:
 	blk_put_queue(disk->queue);
 out_put_disk:
 	put_disk(disk);
+	if (tpnt->stats != NULL)
+		kfree(tpnt->stats);
 	kfree(tpnt);
 out_buffer_free:
 	kfree(buffer);
@@ -4288,6 +4361,7 @@ static void scsi_tape_release(struct kre
 	struct scsi_tape *tpnt = to_scsi_tape(kref);
 	struct gendisk *disk = tpnt->disk;
 
+	dev_set_drvdata(&tpnt->device->sdev_gendev, NULL);
 	tpnt->device = NULL;
 
 	if (tpnt->buffer) {
@@ -4298,6 +4372,10 @@ static void scsi_tape_release(struct kre
 
 	disk->private_data = NULL;
 	put_disk(disk);
+	if (tpnt->stats != NULL) {
+		kfree(tpnt->stats);
+		tpnt->stats = NULL;
+	}
 	kfree(tpnt);
 	return;
 }
@@ -4523,6 +4601,412 @@ static struct attribute *st_dev_attrs[]
 };
 ATTRIBUTE_GROUPS(st_dev);
 
+/* Support for tape stats */
+
+struct tape_stats_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct scsi_tape *, char *);
+	ssize_t (*store)(struct scsi_tape *, const char *, size_t);
+};
+
+#define TAPE_STATS_ATTR(_name, _mode, _show, _store)		\
+const struct tape_stats_attr tape_stats_attr_##_name = {	\
+	.attr = {.name  = __stringify(_name), .mode   = _mode },\
+	.show   = _show,					\
+	.store  = _store,					\
+}
+
+void st_release_stats_kobj(struct kobject *dummy)
+{
+}
+
+/**
+ * st_stats_attr_show_read_cnt - return read count - count of reads made
+ * from tape drive
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_read_cnt(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%llu", st->stats->read_cnt);
+	return snprintf(buf, 4096, "%llu", st->stats->sync_read_cnt);
+}
+
+/**
+ * st_stats_attr_show_read_byte_cnt - return read byte count - tape drives
+ * may use blocks less than 512 bytes this gives the raw byte count of
+ * of data read from the tape drive.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_read_byte_cnt(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%llu", st->stats->read_byte_cnt);
+	return snprintf(buf, 4096, "%llu", st->stats->sync_read_byte_cnt);
+}
+
+/**
+ * st_stats_attr_show_read_ms - return read ms - overall time spent waiting
+ * on reads in ms.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_read_ms(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%u",
+			jiffies_to_msecs(st->stats->read_ticks));
+	return snprintf(buf, 4096, "%u",
+		jiffies_to_msecs(st->stats->sync_read_ticks));
+}
+
+/**
+ * st_stats_attr_show_write_cnt - write count - number of user calls
+ * to write(2) that have written data to tape.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_write_cnt(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%llu", st->stats->write_cnt);
+	return snprintf(buf, 4096, "%llu", st->stats->sync_write_cnt);
+}
+
+/**
+ * st_stats_attr_show_write_byte_cnt - write byte count - raw count of
+ * bytes written to tape.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_write_byte_cnt(struct scsi_tape *st,
+	char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%llu", st->stats->write_byte_cnt);
+	return snprintf(buf, 4096, "%llu", st->stats->sync_write_byte_cnt);
+}
+
+/**
+ * st_stats_attr_show_write_ms - write ms - number of milliseconds since
+ * last open waiting on write requests to complete.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_write_ms(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%u",
+			jiffies_to_msecs(st->stats->write_ticks));
+	return snprintf(buf, 4096, "%u",
+		jiffies_to_msecs(st->stats->sync_write_ticks));
+}
+
+/**
+ * st_stats_attr_show_in_flight - number of I/Os currently in flight -
+ * in most cases this will be either 0 or 1. It may be higher if someone
+ * has also issued other SCSI commands such as via an ioctl.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_in_flight(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%llu", st->stats->in_flight);
+	return snprintf(buf, 4096, "%llu", st->stats->sync_in_flight);
+}
+
+/**
+ * st_stats_attr_show_io_ms - io wait ms - this is the number of ms spent
+ * waiting on other I/O to complete. This includes tape movement commands
+ * such as rewinding, seeking to end of file or tape, etc. Except in
+ * complex tape management programs these will be indirect commands issued
+ * by the driver - e.g. rewind on close.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_io_ms(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%u",
+			jiffies_to_msecs(st->stats->io_ticks));
+	return snprintf(buf, 4096, "%u",
+		jiffies_to_msecs(st->stats->sync_io_ticks));
+}
+
+/**
+ * st_stats_attr_show_other_cnt - other io count - this is the number of
+ * I/O requests that make up the time returned from st_stats_attr_show_io_ms.
+ * Typically these are tape movement requests but will include driver
+ * tape movement. This includes on requests seen by the st driver.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_other_cnt(struct scsi_tape *st, char *buf)
+{
+	if (st->stats->sync == 0)
+		return snprintf(buf, 4096, "%llu", st->stats->other_cnt);
+	return snprintf(buf, 4096, "%llu", st->stats->sync_other_cnt);
+}
+
+/**
+ * st_stats_attr_show_sync - if 0 is returned the stats are being read
+ * process holding the stats in sync. The value of the sync file should not
+ * be non-zero for long. You should be making sure that it is left non-zero
+ * for the shortest possible time.
+ * If you do not care about them being in sync you need to take some
+ * action if the sync value is non-zero as someone could have left it
+ * non-zero which means the values never change. You should write a "0"
+ * to the sync file before you read the other stats files if you have
+ * permission to so.
+ * @st: scsi_tape structure.
+ * @buf: buffer to return formatted data in
+ */
+static ssize_t st_stats_attr_show_sync(struct scsi_tape *st, char *buf)
+{
+	return snprintf(buf, 4096, "%llu", st->stats->sync);
+}
+
+/**
+ * st_stats_attr_store_sync - store the sync value this causes the current
+ * stats to be saved to their save variables and values are then read from
+ * them to get a consistent set of values. You must read back the sync value
+ * after you have read all the values required and check if it has changed
+ * and decide if you will resync and reread the values if it has been changed.
+ * Any program writing a sync value must set it back to 0 once they are done.
+ * You should write your current pid into the sync so you can determine if
+ * someone else is changing the sync value. There is still a chance for a race
+ * but the sync file is the only method I could think of when writing this to
+ * get a best approximation of a set of in sync statistics.
+ * @st: scsi_tape structure.
+ * @buf: buffer to containing sync value
+ * @len length of data
+ **/
+static ssize_t st_stats_attr_store_sync(struct scsi_tape *st, const char *buf,
+	size_t len)
+{
+	if (kstrtoull(buf, 0, &st->stats->sync) != 0)
+		st->stats->sync = 0;
+	if (st->stats->sync == 0)
+		return len;
+	st->stats->sync_read_byte_cnt = st->stats->read_byte_cnt;
+	st->stats->sync_write_byte_cnt = st->stats->write_byte_cnt;
+	st->stats->sync_in_flight = st->stats->in_flight;
+	st->stats->sync_read_cnt = st->stats->read_cnt;
+	st->stats->sync_write_cnt = st->stats->write_cnt;
+	st->stats->sync_other_cnt = st->stats->other_cnt;
+	st->stats->sync_read_ticks = st->stats->read_ticks;
+	st->stats->sync_write_ticks = st->stats->write_ticks;
+	st->stats->sync_io_ticks = st->stats->io_ticks;
+	return len;
+}
+
+TAPE_STATS_ATTR(read_cnt, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_read_cnt, NULL);
+TAPE_STATS_ATTR(read_byte_cnt, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_read_byte_cnt, NULL);
+TAPE_STATS_ATTR(read_ms, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_read_ms, NULL);
+TAPE_STATS_ATTR(write_cnt, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_write_cnt, NULL);
+TAPE_STATS_ATTR(write_byte_cnt, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_write_byte_cnt, NULL);
+TAPE_STATS_ATTR(write_ms, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_write_ms, NULL);
+TAPE_STATS_ATTR(in_flight, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_in_flight, NULL);
+TAPE_STATS_ATTR(io_ms, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_io_ms, NULL);
+TAPE_STATS_ATTR(other_cnt, S_IRUSR | S_IRGRP | S_IROTH,
+	st_stats_attr_show_other_cnt, NULL);
+TAPE_STATS_ATTR(sync, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH,
+	st_stats_attr_show_sync, st_stats_attr_store_sync);
+#define to_tape_stats_attr(ptr) container_of(ptr, struct tape_stats_attr, attr)
+
+static const struct tape_stats_attr *tape_stats_attr_group[] = {
+	&tape_stats_attr_read_cnt,
+	&tape_stats_attr_read_byte_cnt,
+	&tape_stats_attr_read_ms,
+	&tape_stats_attr_write_cnt,
+	&tape_stats_attr_write_byte_cnt,
+	&tape_stats_attr_write_ms,
+	&tape_stats_attr_in_flight,
+	&tape_stats_attr_io_ms,
+	&tape_stats_attr_other_cnt,
+	&tape_stats_attr_sync,
+	NULL
+};
+
+/**
+ * st_stats_remove_file - remove sysfs atape stats attribute file.
+ * @st: scsi_tape structure.
+ * @attr: device attribute descriptor.
+ */
+static void st_stats_remove_file(struct scsi_tape *st,
+	const struct tape_stats_attr *attr)
+{
+	if (st && st->stats)
+		sysfs_remove_file(&st->stats->statistics, &attr->attr);
+}
+
+/**
+ * st_stats_create_file - create sysfs tape stats attribute file
+ * @st: scsi_tape structure.
+ * @attr: device attribute descriptor.
+ */
+static int st_stats_create_file(struct scsi_tape *st,
+	const struct tape_stats_attr *attr)
+{
+	int error = 0;
+
+	if (st && st->stats)
+		error = sysfs_create_file(&st->stats->statistics, &attr->attr);
+	return error;
+}
+
+/**
+ * st_stats_create_files - register files in the device
+ * statistics directory.
+ * @st: scsi_tape structure.
+ */
+static void st_stats_create_files(struct scsi_tape *st)
+{
+	int i = 0, rval = 0;
+
+	while (tape_stats_attr_group[i] != NULL) {
+		rval = st_stats_create_file(st, tape_stats_attr_group[i]);
+		if (rval != 0)
+			st_stats_remove_file(st, tape_stats_attr_group[i]);
+		i++;
+	}
+}
+
+/**
+ * st_stats_remove_files - remove the files from the statistics directory.
+ * @st: struct scsi_tape
+ */
+
+static void st_stats_remove_files(struct scsi_tape *st)
+{
+	int i = 0;
+
+	while (tape_stats_attr_group[i] != NULL) {
+		st_stats_remove_file(st, tape_stats_attr_group[i]);
+		i++;
+	}
+}
+
+/**
+ * st_tape_attr_store - call functions required to implement the store
+ * functionality. Works similar to show function.
+ * @kobj: struct kobject
+ * @attr: struct attribute
+ * @buf: data provided by caller
+ * @len: length of data provided
+ */
+static ssize_t st_tape_attr_store(struct kobject *kobj, struct attribute *attr,
+	const char *buf, size_t len)
+{
+	struct kobject *ktemp = kobj->parent;
+	struct device *dev;
+	struct scsi_tape *st;
+	struct tape_stats_attr *st_attr = to_tape_stats_attr(attr);
+	ssize_t ret = -EIO;
+	int i = 0;
+/* Make sure that we are being asked for an attribute we created */
+	while (tape_stats_attr_group[i] != NULL) {
+		if (tape_stats_attr_group[i] == st_attr)
+			break;
+		i++;
+	}
+	if (tape_stats_attr_group[i] == NULL)
+		return ret;
+
+	if ((!st_attr->store) || (ktemp == 0))
+		return ret;
+	dev = kobj_to_dev(ktemp);
+	if (dev == 0)
+		return ret;
+
+	mutex_lock(&st_ref_mutex);
+	st = dev_get_drvdata(dev);
+	if (st == 0) {
+		mutex_unlock(&st_ref_mutex);
+		return ret;
+	}
+	kref_get(&st->kref);
+	mutex_unlock(&st_ref_mutex);
+
+	if ((st->stats != NULL) && (st_attr->store))
+		ret = st_attr->store(st, buf, len);
+
+	mutex_lock(&st_ref_mutex);
+	kref_put(&st->kref, scsi_tape_release);
+	mutex_unlock(&st_ref_mutex);
+
+	return ret;
+}
+
+/**
+ * st_tape_attr_show - call the functions that provide the statistics.
+ * This function makes sure that the struct scsi_tape being referred to is
+ * current and has not been deleted (e.g. during an unload of the st
+ * driver or the tape drive disappearing).
+ * @kobj: struct kobject
+ * @attr: struct attribute
+ * @buf: data being returned to caller
+ */
+static ssize_t st_tape_attr_show(struct kobject *kobj, struct attribute *attr,
+	char *buf)
+{
+	struct kobject *ktemp = kobj->parent;
+	struct device *dev;
+	struct scsi_tape *st;
+	struct tape_stats_attr *st_attr = to_tape_stats_attr(attr);
+	ssize_t ret = -EIO;
+	int i = 0;
+/* Make sure that we are being asked for an attribute we created */
+	while (tape_stats_attr_group[i] != NULL) {
+		if (tape_stats_attr_group[i] == st_attr)
+			break;
+		i++;
+	}
+	if (tape_stats_attr_group[i] == NULL)
+		return ret;
+/* kobject passed in is for the statistics directory. We need to look at the
+   parent kobject (part of a struct device) and from there get the struct
+   scsi_tape. If no show function return error as well. */
+	if ((!st_attr->show) || (ktemp == 0) || (buf == 0))
+		return ret;
+	dev = kobj_to_dev(ktemp);
+	if (dev == 0)
+		return ret;
+/* dev_get_drvdata must return 0 if the struct scsi_tape has been freed.
+   Holding st_ref_mutex means it cannot be freed while we check and we
+   grab a reference to the struct scsi_tape before unlocking. */
+	mutex_lock(&st_ref_mutex);
+	st = dev_get_drvdata(dev);
+	if (st == 0) {
+		mutex_unlock(&st_ref_mutex);
+		return ret;
+	}
+	kref_get(&st->kref);
+	mutex_unlock(&st_ref_mutex);
+
+	if ((st->stats != NULL) && (st_attr->show))
+		ret = st_attr->show(st, buf);
+
+	mutex_lock(&st_ref_mutex);
+	kref_put(&st->kref, scsi_tape_release);
+	mutex_unlock(&st_ref_mutex);
+
+	return ret;
+}
+
+
+
 /* The following functions may be useful for a larger audience. */
 static int sgl_map_user_pages(struct st_buffer *STbp,
 			      const unsigned int max_pages, unsigned long uaddr,
diff -uprN a/drivers/scsi/st.h b/drivers/scsi/st.h
--- a/drivers/scsi/st.h	2015-01-11 14:46:00.243814755 -0600
+++ b/drivers/scsi/st.h	2015-01-11 18:04:54.714001944 -0600
@@ -92,6 +92,32 @@ struct st_partstat {
 	int drv_file;
 };
 
+/* Tape statistics */
+struct scsi_tape_stats {
+	struct kobject statistics; /* Object for statistics directory */
+	u64 read_byte_cnt;      /* bytes read */
+	u64 write_byte_cnt;     /* bytes written */
+	u64 in_flight;          /* Number of I/Os in flight */
+	u64 read_cnt;           /* Count of read requests */
+	u64 write_cnt;          /* Count of write requests */
+	u64 other_cnt;          /* Count of other requests either implicit
+					or from user space via ioctl. */
+	u64 read_ticks;         /* Ticks spent completing read requests */
+	u64 write_ticks;        /* Ticks spent completing write requests */
+	u64 io_ticks;           /* Ticks spent doing any I/O */
+	u64 stamp;              /* holds time request was queued */
+	u64 sync;               /* Are stats read in sync */
+	u64 sync_read_byte_cnt;
+	u64 sync_write_byte_cnt;
+	u64 sync_in_flight;
+	u64 sync_read_cnt;
+	u64 sync_write_cnt;
+	u64 sync_other_cnt;
+	u64 sync_read_ticks;
+	u64 sync_write_ticks;
+	u64 sync_io_ticks;
+};
+
 #define ST_NBR_PARTITIONS 4
 
 /* The tape drive descriptor */
@@ -171,6 +197,7 @@ struct scsi_tape {
 #endif
 	struct gendisk *disk;
 	struct kref     kref;
+	struct scsi_tape_stats *stats;
 };
 
 /* Bit masks for use_pf */
diff -uprN a/Documentation/scsi/st.txt b/Documentation/scsi/st.txt
--- a/Documentation/scsi/st.txt	2015-01-11 14:45:56.235815028 -0600
+++ b/Documentation/scsi/st.txt	2015-01-12 11:23:37.885398363 -0600
@@ -151,6 +151,98 @@ A link named 'tape' is made from the SCS
 directory corresponding to the mode 0 auto-rewind device (e.g., st0). 
 
 
+SYSFS AND STATISTICS FOR TAPE DEVICES
+
+The st driver maintains statistics for tape drives inside the sysfs filesystem.
+The following method can be used to locate the statistics directories that are
+available (assuming that sysfs is mounted at /sys):
+
+1. Use opendir(3) on the directory /sys/class/scsi_tape
+2. Use readdir(3) to read the directory contents
+3. Use regcomp(3)/regexec(3) to match directory entries to the extended
+        regular expression "^st[0-9]+$"
+4. Access the statistics from the /sys/class/scsi_tape/<match>/device/statistics
+        directory (where <match> is a directory entry from /sys/class/scsi_tape
+        that matched the extended regular expression)
+
+An example of a matching directory with the full path to the statistics
+directory would be:
+
+/sys/class/scsi_tape/st0/device/statistics
+
+This is not the real path to the statistics directory but it does represent a
+path which makes the directories easiest to find. The statistics directory is
+common to all tape devices associated with a SCSI tape drive the statistics
+will be the same for the st0, nst0, etc.
+
+The statistics directory contains the following files:
+
+1.  in_flight - The number of I/Os currently outstanding to this device.
+2.  io_ms - The amount of time spent waiting (in milliseconds) for all I/O
+        to complete (including read and write). This includes tape movement
+        commands such as seeking between file or set marks and implicit tape
+        movement such as when rewind on close tape devices are used.
+3.  other_cnt - The number of I/Os issued to the tape drive other than read or
+        write commands. The time taken to complete these commands is tracked
+        in io_ms.
+4.  read_byte_cnt - The number of bytes read from the tape drive.
+5.  read_cnt - The number of read requests issued to the tape drive.
+6.  read_ms - The amount of time (in milliseconds) spent waiting for read
+        requests to complete.
+7.  sync - Described below
+8.  write_byte_cnt - The number of bytes written to the tape drive.
+9.  write_cnt - The number of write requests issued to the tape drive.
+10. write_ms - The amount of time (in milliseconds) spent waiting for write
+        requests to complete.
+
+Note: The count statistics are incrememented at the start of an I/O but the
+time they take to complete is not added to the statistics until they complete.
+
+When read all of the statistics may be out of sync. That is they may not be
+on consistent set of statistics because of the amount of time taken to
+open/read/close each statistics file if the tape drive is in use.
+To alleviate that issue and try to make a set of consistent statistics
+the sync file was created.
+
+When a non-zero value is written into the sync file all of the statistics
+are saved. Until a new value is written into the sync file accesses to the
+statistics files will return the saved values instead of the current values
+(writing the same value as previously written causes the statistics to be
+saved again). Writing 0 makes the statistics files provides access to the
+current instead of the saved values.
+
+The following is the suggested way to access the statistics when you require
+a consistent set of statistics:
+
+1. Determine the location of the statistics directory
+2. Open the sync file and write a set value into the file (choose one value
+        that your application uses always - doing so will allow someone else
+        to determine who else is using the sync file)
+3. Open, read, then close the individual statistics files
+4. Read back the sync value and ensure you read back the value that you wrote.
+If the value read back is not the same write your chosen value back into the
+sync file and then re-read the statistics. If the sync value has changed again
+stop after 3 attempts and just use the values you gathered. There should not
+be that many users of the statistics that a collision is likely to happen.
+5. Always write 0 back into the sync file when you are done then close it.
+
+If you wish to manually view the statistics you can do so in the following
+manner:
+
+$ cd /sys/class/scsi_tape/st0/device/statistics
+$ grep [0-9] *
+in_flight:0
+io_ms:33263
+other_cnt:5
+read_byte_cnt:0
+read_cnt:0
+read_ms:0
+sync:0
+write_byte_cnt:104857600
+write_cnt:102400
+write_ms:24069
+
+
 BSD AND SYS V SEMANTICS
 
 The user can choose between these two behaviours of the tape driver by
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-01-13  3:43 [PATCH] st: implement sysfs based tape statistics v2 Seymour, Shane M
  2015-01-27  0:11 ` Seymour, Shane M
@ 2015-02-02 15:16 ` Laurence Oberman
  2015-02-05 17:03   ` "Kai Mäkisara (Kolumbus)"
  2015-02-08 17:07 ` Dale R. Worley
  2 siblings, 1 reply; 17+ messages in thread
From: Laurence Oberman @ 2015-02-02 15:16 UTC (permalink / raw)
  To: Seymour, Shane M, loberman@redhat.com
  Cc: linux-scsi@vger.kernel.org, Kai.Makisara@kolumbus.fi,
	James E.J. Bottomley (JBottomley@parallels.com), jeffm@suse.com

I pulled this this morning and will be testing. The prior version was
stable for me on the upstream and RHEL 6.5 kernel without exhaustive
testing.
We also just received more requests to get this into RHEL from HP /
Red Hat customers.

Kai, what are your thoughts. I realize this is a large amount of
additional code. I am not keen to create a driver just for stats as we
would have to keep the rest of the st driver changes always in sync.

Thanks
Laurence

On Mon, Jan 12, 2015 at 10:43 PM, Seymour, Shane M <shane.seymour@hp.com> wrote:
> Some small changes since the last version - this version removes two files from sysfs compared to the last version (read and write block counts since they're derived from the byte counts they can be calculated in user space) but that's the only change. This version has been rebased to 3.19.0-rc3-next-20150108.
>
> Since posting the last version an email was received by Kai and myself from an AT&T employee who has a need for tape statistics to be implemented (who gave permission to quote their email), I've included part of the email here:
>
> "There are over 20,000 tape devices managed by our operations group zoned to tens of thousands of servers.
>
> My challenge is that I cannot provide operations a solution that gives them visibility into the tape drive performance metrics when that platform is linux. Our legacy platforms (Solaris,HPUX,AIX) provide facilities to use iostat and sar to determine the write speed of the tape drives. We took for granted that this would be available in linux and its absence has been very troublesome. Because operations cannot measure tape drive performance in this way they cannot easily determine when there is a tape drive performance problem and whether the change improved or worsened the performance problem.
> ...
> I have followed the debate https://lkml.org/lkml/2013/3/20/696 and from a service provide perspective we would expect the same maturity and functionality that we have from the traditional unix platform in regards to iostat/sar. This issue is important and urgent because tape drive performance issues are common and I am unable to provide standards and processes to identify and remediate these issues."
>
> Another HP customer has also requested the same functionality (but hasn't given permission to be named), they own tape drives numbering in the 1000s and also need the ability to investigate performance issues.
>
> Signed-off-by: shane.seymour@hp.com
> Tested-by: shane.seymour@hp.com
> ---
> diff -uprN a/drivers/scsi/st.c b/drivers/scsi/st.c
> --- a/drivers/scsi/st.c 2015-01-11 14:46:00.243814755 -0600
> +++ b/drivers/scsi/st.c 2015-01-12 13:54:52.549117333 -0600
> @@ -20,6 +20,7 @@
>  static const char *verstr = "20101219";
>
>  #include <linux/module.h>
> +#include <linux/kobject.h>
>
>  #include <linux/fs.h>
>  #include <linux/kernel.h>
> @@ -226,6 +227,20 @@ static DEFINE_SPINLOCK(st_index_lock);
>  static DEFINE_SPINLOCK(st_use_lock);
>  static DEFINE_IDR(st_index_idr);
>
> +static inline void st_stats_remove_files(struct scsi_tape *);
> +static inline void st_stats_create_files(struct scsi_tape *);
> +static ssize_t st_tape_attr_show(struct kobject *, struct attribute *, char *);
> +static ssize_t st_tape_attr_store(struct kobject *, struct attribute *,
> +       const char *, size_t);
> +static void st_release_stats_kobj(struct kobject *);
> +static const struct sysfs_ops st_stats_sysfs_ops = {
> +       .show   = st_tape_attr_show,
> +       .store  = st_tape_attr_store,
> +};
> +static struct kobj_type st_stats_ktype = {
> +       .release = st_release_stats_kobj,
> +       .sysfs_ops = &st_stats_sysfs_ops,
> +};
>
>
>
>  #include "osst_detect.h"
> @@ -476,10 +491,22 @@ static void st_scsi_execute_end(struct r
>         struct st_request *SRpnt = req->end_io_data;
>         struct scsi_tape *STp = SRpnt->stp;
>         struct bio *tmp;
> +       u64 ticks;
>
>         STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors;
>         STp->buffer->cmdstat.residual = req->resid_len;
>
> +       if (STp->stats != NULL) {
> +               ticks = get_jiffies_64();
> +               STp->stats->in_flight--;
> +               ticks -= STp->stats->stamp;
> +               STp->stats->io_ticks += ticks;
> +               if (req->cmd[0] == WRITE_6)
> +                       STp->stats->write_ticks += ticks;
> +               else if (req->cmd[0] == READ_6)
> +                       STp->stats->read_ticks += ticks;
> +       }
> +
>         tmp = SRpnt->bio;
>         if (SRpnt->waiting)
>                 complete(SRpnt->waiting);
> @@ -496,6 +523,7 @@ static int st_scsi_execute(struct st_req
>         struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data;
>         int err = 0;
>         int write = (data_direction == DMA_TO_DEVICE);
> +       struct scsi_tape *STp = SRpnt->stp;
>
>         req = blk_get_request(SRpnt->stp->device->request_queue, write,
>                               GFP_KERNEL);
> @@ -516,6 +544,20 @@ static int st_scsi_execute(struct st_req
>                 }
>         }
>
> +       if (STp->stats != NULL) {
> +               if (cmd[0] == WRITE_6) {
> +                       STp->stats->write_cnt++;
> +                       STp->stats->write_byte_cnt += bufflen;
> +               } else if (cmd[0] == READ_6) {
> +                       STp->stats->read_cnt++;
> +                       STp->stats->read_byte_cnt += bufflen;
> +               } else {
> +                       STp->stats->other_cnt++;
> +               }
> +               STp->stats->stamp = get_jiffies_64();
> +               STp->stats->in_flight++;
> +       }
> +
>         SRpnt->bio = req->bio;
>         req->cmd_len = COMMAND_SIZE(cmd[0]);
>         memset(req->cmd, 0, BLK_MAX_CDB);
> @@ -4064,6 +4106,8 @@ out:
>  static int create_cdevs(struct scsi_tape *tape)
>  {
>         int mode, error;
> +       struct scsi_tape_stats *tmp;
> +
>         for (mode = 0; mode < ST_NBR_MODES; ++mode) {
>                 error = create_one_cdev(tape, mode, 0);
>                 if (error)
> @@ -4072,6 +4116,26 @@ static int create_cdevs(struct scsi_tape
>                 if (error)
>                         return error;
>         }
> +/* Create statistics directory under device, if it fails we dont
> +   have statistics. */
> +       if (tape->stats != NULL) {
> +               kobject_init(&tape->stats->statistics, &st_stats_ktype);
> +               error = kobject_add(&tape->stats->statistics,
> +                       &tape->device->sdev_gendev.kobj,
> +                       "statistics");
> +               if (error) {
> +                       kobject_del(&tape->stats->statistics);
> +                       tmp = tape->stats;
> +                       tape->stats = NULL;
> +                       kfree(tmp);
> +               } else {
> +                       st_stats_create_files(tape);
> +               }
> +       } else {
> +               tmp = tape->stats;
> +               tape->stats = NULL;
> +               kfree(tmp);
> +       }
>
>         return sysfs_create_link(&tape->device->sdev_gendev.kobj,
>                                  &tape->modes[0].devs[0]->kobj, "tape");
> @@ -4081,6 +4145,10 @@ static void remove_cdevs(struct scsi_tap
>  {
>         int mode, rew;
>         sysfs_remove_link(&tape->device->sdev_gendev.kobj, "tape");
> +       if (tape->stats != NULL) {
> +               st_stats_remove_files(tape);
> +               kobject_del(&tape->stats->statistics);
> +       }
>         for (mode = 0; mode < ST_NBR_MODES; mode++) {
>                 struct st_modedef *STm = &(tape->modes[mode]);
>                 for (rew = 0; rew < 2; rew++) {
> @@ -4222,6 +4290,8 @@ static int st_probe(struct device *dev)
>         }
>         tpnt->index = error;
>         sprintf(disk->disk_name, "st%d", tpnt->index);
> +/* Allocate stats structure */
> +       tpnt->stats = kzalloc(sizeof(struct scsi_tape_stats), GFP_ATOMIC);
>
>         dev_set_drvdata(dev, tpnt);
>
> @@ -4241,6 +4311,7 @@ static int st_probe(struct device *dev)
>
>  out_remove_devs:
>         remove_cdevs(tpnt);
> +       dev_set_drvdata(dev, NULL);
>         spin_lock(&st_index_lock);
>         idr_remove(&st_index_idr, tpnt->index);
>         spin_unlock(&st_index_lock);
> @@ -4248,6 +4319,8 @@ out_put_queue:
>         blk_put_queue(disk->queue);
>  out_put_disk:
>         put_disk(disk);
> +       if (tpnt->stats != NULL)
> +               kfree(tpnt->stats);
>         kfree(tpnt);
>  out_buffer_free:
>         kfree(buffer);
> @@ -4288,6 +4361,7 @@ static void scsi_tape_release(struct kre
>         struct scsi_tape *tpnt = to_scsi_tape(kref);
>         struct gendisk *disk = tpnt->disk;
>
> +       dev_set_drvdata(&tpnt->device->sdev_gendev, NULL);
>         tpnt->device = NULL;
>
>         if (tpnt->buffer) {
> @@ -4298,6 +4372,10 @@ static void scsi_tape_release(struct kre
>
>         disk->private_data = NULL;
>         put_disk(disk);
> +       if (tpnt->stats != NULL) {
> +               kfree(tpnt->stats);
> +               tpnt->stats = NULL;
> +       }
>         kfree(tpnt);
>         return;
>  }
> @@ -4523,6 +4601,412 @@ static struct attribute *st_dev_attrs[]
>  };
>  ATTRIBUTE_GROUPS(st_dev);
>
> +/* Support for tape stats */
> +
> +struct tape_stats_attr {
> +       struct attribute attr;
> +       ssize_t (*show)(struct scsi_tape *, char *);
> +       ssize_t (*store)(struct scsi_tape *, const char *, size_t);
> +};
> +
> +#define TAPE_STATS_ATTR(_name, _mode, _show, _store)           \
> +const struct tape_stats_attr tape_stats_attr_##_name = {       \
> +       .attr = {.name  = __stringify(_name), .mode   = _mode },\
> +       .show   = _show,                                        \
> +       .store  = _store,                                       \
> +}
> +
> +void st_release_stats_kobj(struct kobject *dummy)
> +{
> +}
> +
> +/**
> + * st_stats_attr_show_read_cnt - return read count - count of reads made
> + * from tape drive
> + * @st: scsi_tape structure.
> + * @buf: buffer to return formatted data in
> + */
> +static ssize_t st_stats_attr_show_read_cnt(struct scsi_tape *st, char *buf)
> +{
> +       if (st->stats->sync == 0)
> +               return snprintf(buf, 4096, "%llu", st->stats->read_cnt);
> +       return snprintf(buf, 4096, "%llu", st->stats->sync_read_cnt);
> +}
> +
> +/**
> + * st_stats_attr_show_read_byte_cnt - return read byte count - tape drives
> + * may use blocks less than 512 bytes this gives the raw byte count of
> + * of data read from the tape drive.
> + * @st: scsi_tape structure.
> + * @buf: buffer to return formatted data in
> + */
> +static ssize_t st_stats_attr_show_read_byte_cnt(struct scsi_tape *st, char *buf)
> +{
> +       if (st->stats->sync == 0)
> +               return snprintf(buf, 4096, "%llu", st->stats->read_byte_cnt);
> +       return snprintf(buf, 4096, "%llu", st->stats->sync_read_byte_cnt);
> +}
> +
> +/**
> + * st_stats_attr_show_read_ms - return read ms - overall time spent waiting
> + * on reads in ms.
> + * @st: scsi_tape structure.
> + * @buf: buffer to return formatted data in
> + */
> +static ssize_t st_stats_attr_show_read_ms(struct scsi_tape *st, char *buf)
> +{
> +       if (st->stats->sync == 0)
> +               return snprintf(buf, 4096, "%u",
> +                       jiffies_to_msecs(st->stats->read_ticks));
> +       return snprintf(buf, 4096, "%u",
> +               jiffies_to_msecs(st->stats->sync_read_ticks));
> +}
> +
> +/**
> + * st_stats_attr_show_write_cnt - write count - number of user calls
> + * to write(2) that have written data to tape.
> + * @st: scsi_tape structure.
> + * @buf: buffer to return formatted data in
> + */
> +static ssize_t st_stats_attr_show_write_cnt(struct scsi_tape *st, char *buf)
> +{
> +       if (st->stats->sync == 0)
> +               return snprintf(buf, 4096, "%llu", st->stats->write_cnt);
> +       return snprintf(buf, 4096, "%llu", st->stats->sync_write_cnt);
> +}
> +
> +/**
> + * st_stats_attr_show_write_byte_cnt - write byte count - raw count of
> + * bytes written to tape.
> + * @st: scsi_tape structure.
> + * @buf: buffer to return formatted data in
> + */
> +static ssize_t st_stats_attr_show_write_byte_cnt(struct scsi_tape *st,
> +       char *buf)
> +{
> +       if (st->stats->sync == 0)
> +               return snprintf(buf, 4096, "%llu", st->stats->write_byte_cnt);
> +       return snprintf(buf, 4096, "%llu", st->stats->sync_write_byte_cnt);
> +}
> +
> +/**
> + * st_stats_attr_show_write_ms - write ms - number of milliseconds since
> + * last open waiting on write requests to complete.
> + * @st: scsi_tape structure.
> + * @buf: buffer to return formatted data in
> + */
> +static ssize_t st_stats_attr_show_write_ms(struct scsi_tape *st, char *buf)
> +{
> +       if (st->stats->sync == 0)
> +               return snprintf(buf, 4096, "%u",
> +                       jiffies_to_msecs(st->stats->write_ticks));
> +       return snprintf(buf, 4096, "%u",
> +               jiffies_to_msecs(st->stats->sync_write_ticks));
> +}
> +
> +/**
> + * st_stats_attr_show_in_flight - number of I/Os currently in flight -
> + * in most cases this will be either 0 or 1. It may be higher if someone
> + * has also issued other SCSI commands such as via an ioctl.
> + * @st: scsi_tape structure.
> + * @buf: buffer to return formatted data in
> + */
> +static ssize_t st_stats_attr_show_in_flight(struct scsi_tape *st, char *buf)
> +{
> +       if (st->stats->sync == 0)
> +               return snprintf(buf, 4096, "%llu", st->stats->in_flight);
> +       return snprintf(buf, 4096, "%llu", st->stats->sync_in_flight);
> +}
> +
> +/**
> + * st_stats_attr_show_io_ms - io wait ms - this is the number of ms spent
> + * waiting on other I/O to complete. This includes tape movement commands
> + * such as rewinding, seeking to end of file or tape, etc. Except in
> + * complex tape management programs these will be indirect commands issued
> + * by the driver - e.g. rewind on close.
> + * @st: scsi_tape structure.
> + * @buf: buffer to return formatted data in
> + */
> +static ssize_t st_stats_attr_show_io_ms(struct scsi_tape *st, char *buf)
> +{
> +       if (st->stats->sync == 0)
> +               return snprintf(buf, 4096, "%u",
> +                       jiffies_to_msecs(st->stats->io_ticks));
> +       return snprintf(buf, 4096, "%u",
> +               jiffies_to_msecs(st->stats->sync_io_ticks));
> +}
> +
> +/**
> + * st_stats_attr_show_other_cnt - other io count - this is the number of
> + * I/O requests that make up the time returned from st_stats_attr_show_io_ms.
> + * Typically these are tape movement requests but will include driver
> + * tape movement. This includes on requests seen by the st driver.
> + * @st: scsi_tape structure.
> + * @buf: buffer to return formatted data in
> + */
> +static ssize_t st_stats_attr_show_other_cnt(struct scsi_tape *st, char *buf)
> +{
> +       if (st->stats->sync == 0)
> +               return snprintf(buf, 4096, "%llu", st->stats->other_cnt);
> +       return snprintf(buf, 4096, "%llu", st->stats->sync_other_cnt);
> +}
> +
> +/**
> + * st_stats_attr_show_sync - if 0 is returned the stats are being read
> + * process holding the stats in sync. The value of the sync file should not
> + * be non-zero for long. You should be making sure that it is left non-zero
> + * for the shortest possible time.
> + * If you do not care about them being in sync you need to take some
> + * action if the sync value is non-zero as someone could have left it
> + * non-zero which means the values never change. You should write a "0"
> + * to the sync file before you read the other stats files if you have
> + * permission to so.
> + * @st: scsi_tape structure.
> + * @buf: buffer to return formatted data in
> + */
> +static ssize_t st_stats_attr_show_sync(struct scsi_tape *st, char *buf)
> +{
> +       return snprintf(buf, 4096, "%llu", st->stats->sync);
> +}
> +
> +/**
> + * st_stats_attr_store_sync - store the sync value this causes the current
> + * stats to be saved to their save variables and values are then read from
> + * them to get a consistent set of values. You must read back the sync value
> + * after you have read all the values required and check if it has changed
> + * and decide if you will resync and reread the values if it has been changed.
> + * Any program writing a sync value must set it back to 0 once they are done.
> + * You should write your current pid into the sync so you can determine if
> + * someone else is changing the sync value. There is still a chance for a race
> + * but the sync file is the only method I could think of when writing this to
> + * get a best approximation of a set of in sync statistics.
> + * @st: scsi_tape structure.
> + * @buf: buffer to containing sync value
> + * @len length of data
> + **/
> +static ssize_t st_stats_attr_store_sync(struct scsi_tape *st, const char *buf,
> +       size_t len)
> +{
> +       if (kstrtoull(buf, 0, &st->stats->sync) != 0)
> +               st->stats->sync = 0;
> +       if (st->stats->sync == 0)
> +               return len;
> +       st->stats->sync_read_byte_cnt = st->stats->read_byte_cnt;
> +       st->stats->sync_write_byte_cnt = st->stats->write_byte_cnt;
> +       st->stats->sync_in_flight = st->stats->in_flight;
> +       st->stats->sync_read_cnt = st->stats->read_cnt;
> +       st->stats->sync_write_cnt = st->stats->write_cnt;
> +       st->stats->sync_other_cnt = st->stats->other_cnt;
> +       st->stats->sync_read_ticks = st->stats->read_ticks;
> +       st->stats->sync_write_ticks = st->stats->write_ticks;
> +       st->stats->sync_io_ticks = st->stats->io_ticks;
> +       return len;
> +}
> +
> +TAPE_STATS_ATTR(read_cnt, S_IRUSR | S_IRGRP | S_IROTH,
> +       st_stats_attr_show_read_cnt, NULL);
> +TAPE_STATS_ATTR(read_byte_cnt, S_IRUSR | S_IRGRP | S_IROTH,
> +       st_stats_attr_show_read_byte_cnt, NULL);
> +TAPE_STATS_ATTR(read_ms, S_IRUSR | S_IRGRP | S_IROTH,
> +       st_stats_attr_show_read_ms, NULL);
> +TAPE_STATS_ATTR(write_cnt, S_IRUSR | S_IRGRP | S_IROTH,
> +       st_stats_attr_show_write_cnt, NULL);
> +TAPE_STATS_ATTR(write_byte_cnt, S_IRUSR | S_IRGRP | S_IROTH,
> +       st_stats_attr_show_write_byte_cnt, NULL);
> +TAPE_STATS_ATTR(write_ms, S_IRUSR | S_IRGRP | S_IROTH,
> +       st_stats_attr_show_write_ms, NULL);
> +TAPE_STATS_ATTR(in_flight, S_IRUSR | S_IRGRP | S_IROTH,
> +       st_stats_attr_show_in_flight, NULL);
> +TAPE_STATS_ATTR(io_ms, S_IRUSR | S_IRGRP | S_IROTH,
> +       st_stats_attr_show_io_ms, NULL);
> +TAPE_STATS_ATTR(other_cnt, S_IRUSR | S_IRGRP | S_IROTH,
> +       st_stats_attr_show_other_cnt, NULL);
> +TAPE_STATS_ATTR(sync, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH,
> +       st_stats_attr_show_sync, st_stats_attr_store_sync);
> +#define to_tape_stats_attr(ptr) container_of(ptr, struct tape_stats_attr, attr)
> +
> +static const struct tape_stats_attr *tape_stats_attr_group[] = {
> +       &tape_stats_attr_read_cnt,
> +       &tape_stats_attr_read_byte_cnt,
> +       &tape_stats_attr_read_ms,
> +       &tape_stats_attr_write_cnt,
> +       &tape_stats_attr_write_byte_cnt,
> +       &tape_stats_attr_write_ms,
> +       &tape_stats_attr_in_flight,
> +       &tape_stats_attr_io_ms,
> +       &tape_stats_attr_other_cnt,
> +       &tape_stats_attr_sync,
> +       NULL
> +};
> +
> +/**
> + * st_stats_remove_file - remove sysfs atape stats attribute file.
> + * @st: scsi_tape structure.
> + * @attr: device attribute descriptor.
> + */
> +static void st_stats_remove_file(struct scsi_tape *st,
> +       const struct tape_stats_attr *attr)
> +{
> +       if (st && st->stats)
> +               sysfs_remove_file(&st->stats->statistics, &attr->attr);
> +}
> +
> +/**
> + * st_stats_create_file - create sysfs tape stats attribute file
> + * @st: scsi_tape structure.
> + * @attr: device attribute descriptor.
> + */
> +static int st_stats_create_file(struct scsi_tape *st,
> +       const struct tape_stats_attr *attr)
> +{
> +       int error = 0;
> +
> +       if (st && st->stats)
> +               error = sysfs_create_file(&st->stats->statistics, &attr->attr);
> +       return error;
> +}
> +
> +/**
> + * st_stats_create_files - register files in the device
> + * statistics directory.
> + * @st: scsi_tape structure.
> + */
> +static void st_stats_create_files(struct scsi_tape *st)
> +{
> +       int i = 0, rval = 0;
> +
> +       while (tape_stats_attr_group[i] != NULL) {
> +               rval = st_stats_create_file(st, tape_stats_attr_group[i]);
> +               if (rval != 0)
> +                       st_stats_remove_file(st, tape_stats_attr_group[i]);
> +               i++;
> +       }
> +}
> +
> +/**
> + * st_stats_remove_files - remove the files from the statistics directory.
> + * @st: struct scsi_tape
> + */
> +
> +static void st_stats_remove_files(struct scsi_tape *st)
> +{
> +       int i = 0;
> +
> +       while (tape_stats_attr_group[i] != NULL) {
> +               st_stats_remove_file(st, tape_stats_attr_group[i]);
> +               i++;
> +       }
> +}
> +
> +/**
> + * st_tape_attr_store - call functions required to implement the store
> + * functionality. Works similar to show function.
> + * @kobj: struct kobject
> + * @attr: struct attribute
> + * @buf: data provided by caller
> + * @len: length of data provided
> + */
> +static ssize_t st_tape_attr_store(struct kobject *kobj, struct attribute *attr,
> +       const char *buf, size_t len)
> +{
> +       struct kobject *ktemp = kobj->parent;
> +       struct device *dev;
> +       struct scsi_tape *st;
> +       struct tape_stats_attr *st_attr = to_tape_stats_attr(attr);
> +       ssize_t ret = -EIO;
> +       int i = 0;
> +/* Make sure that we are being asked for an attribute we created */
> +       while (tape_stats_attr_group[i] != NULL) {
> +               if (tape_stats_attr_group[i] == st_attr)
> +                       break;
> +               i++;
> +       }
> +       if (tape_stats_attr_group[i] == NULL)
> +               return ret;
> +
> +       if ((!st_attr->store) || (ktemp == 0))
> +               return ret;
> +       dev = kobj_to_dev(ktemp);
> +       if (dev == 0)
> +               return ret;
> +
> +       mutex_lock(&st_ref_mutex);
> +       st = dev_get_drvdata(dev);
> +       if (st == 0) {
> +               mutex_unlock(&st_ref_mutex);
> +               return ret;
> +       }
> +       kref_get(&st->kref);
> +       mutex_unlock(&st_ref_mutex);
> +
> +       if ((st->stats != NULL) && (st_attr->store))
> +               ret = st_attr->store(st, buf, len);
> +
> +       mutex_lock(&st_ref_mutex);
> +       kref_put(&st->kref, scsi_tape_release);
> +       mutex_unlock(&st_ref_mutex);
> +
> +       return ret;
> +}
> +
> +/**
> + * st_tape_attr_show - call the functions that provide the statistics.
> + * This function makes sure that the struct scsi_tape being referred to is
> + * current and has not been deleted (e.g. during an unload of the st
> + * driver or the tape drive disappearing).
> + * @kobj: struct kobject
> + * @attr: struct attribute
> + * @buf: data being returned to caller
> + */
> +static ssize_t st_tape_attr_show(struct kobject *kobj, struct attribute *attr,
> +       char *buf)
> +{
> +       struct kobject *ktemp = kobj->parent;
> +       struct device *dev;
> +       struct scsi_tape *st;
> +       struct tape_stats_attr *st_attr = to_tape_stats_attr(attr);
> +       ssize_t ret = -EIO;
> +       int i = 0;
> +/* Make sure that we are being asked for an attribute we created */
> +       while (tape_stats_attr_group[i] != NULL) {
> +               if (tape_stats_attr_group[i] == st_attr)
> +                       break;
> +               i++;
> +       }
> +       if (tape_stats_attr_group[i] == NULL)
> +               return ret;
> +/* kobject passed in is for the statistics directory. We need to look at the
> +   parent kobject (part of a struct device) and from there get the struct
> +   scsi_tape. If no show function return error as well. */
> +       if ((!st_attr->show) || (ktemp == 0) || (buf == 0))
> +               return ret;
> +       dev = kobj_to_dev(ktemp);
> +       if (dev == 0)
> +               return ret;
> +/* dev_get_drvdata must return 0 if the struct scsi_tape has been freed.
> +   Holding st_ref_mutex means it cannot be freed while we check and we
> +   grab a reference to the struct scsi_tape before unlocking. */
> +       mutex_lock(&st_ref_mutex);
> +       st = dev_get_drvdata(dev);
> +       if (st == 0) {
> +               mutex_unlock(&st_ref_mutex);
> +               return ret;
> +       }
> +       kref_get(&st->kref);
> +       mutex_unlock(&st_ref_mutex);
> +
> +       if ((st->stats != NULL) && (st_attr->show))
> +               ret = st_attr->show(st, buf);
> +
> +       mutex_lock(&st_ref_mutex);
> +       kref_put(&st->kref, scsi_tape_release);
> +       mutex_unlock(&st_ref_mutex);
> +
> +       return ret;
> +}
> +
> +
> +
>  /* The following functions may be useful for a larger audience. */
>  static int sgl_map_user_pages(struct st_buffer *STbp,
>                               const unsigned int max_pages, unsigned long uaddr,
> diff -uprN a/drivers/scsi/st.h b/drivers/scsi/st.h
> --- a/drivers/scsi/st.h 2015-01-11 14:46:00.243814755 -0600
> +++ b/drivers/scsi/st.h 2015-01-11 18:04:54.714001944 -0600
> @@ -92,6 +92,32 @@ struct st_partstat {
>         int drv_file;
>  };
>
> +/* Tape statistics */
> +struct scsi_tape_stats {
> +       struct kobject statistics; /* Object for statistics directory */
> +       u64 read_byte_cnt;      /* bytes read */
> +       u64 write_byte_cnt;     /* bytes written */
> +       u64 in_flight;          /* Number of I/Os in flight */
> +       u64 read_cnt;           /* Count of read requests */
> +       u64 write_cnt;          /* Count of write requests */
> +       u64 other_cnt;          /* Count of other requests either implicit
> +                                       or from user space via ioctl. */
> +       u64 read_ticks;         /* Ticks spent completing read requests */
> +       u64 write_ticks;        /* Ticks spent completing write requests */
> +       u64 io_ticks;           /* Ticks spent doing any I/O */
> +       u64 stamp;              /* holds time request was queued */
> +       u64 sync;               /* Are stats read in sync */
> +       u64 sync_read_byte_cnt;
> +       u64 sync_write_byte_cnt;
> +       u64 sync_in_flight;
> +       u64 sync_read_cnt;
> +       u64 sync_write_cnt;
> +       u64 sync_other_cnt;
> +       u64 sync_read_ticks;
> +       u64 sync_write_ticks;
> +       u64 sync_io_ticks;
> +};
> +
>  #define ST_NBR_PARTITIONS 4
>
>  /* The tape drive descriptor */
> @@ -171,6 +197,7 @@ struct scsi_tape {
>  #endif
>         struct gendisk *disk;
>         struct kref     kref;
> +       struct scsi_tape_stats *stats;
>  };
>
>  /* Bit masks for use_pf */
> diff -uprN a/Documentation/scsi/st.txt b/Documentation/scsi/st.txt
> --- a/Documentation/scsi/st.txt 2015-01-11 14:45:56.235815028 -0600
> +++ b/Documentation/scsi/st.txt 2015-01-12 11:23:37.885398363 -0600
> @@ -151,6 +151,98 @@ A link named 'tape' is made from the SCS
>  directory corresponding to the mode 0 auto-rewind device (e.g., st0).
>
>
> +SYSFS AND STATISTICS FOR TAPE DEVICES
> +
> +The st driver maintains statistics for tape drives inside the sysfs filesystem.
> +The following method can be used to locate the statistics directories that are
> +available (assuming that sysfs is mounted at /sys):
> +
> +1. Use opendir(3) on the directory /sys/class/scsi_tape
> +2. Use readdir(3) to read the directory contents
> +3. Use regcomp(3)/regexec(3) to match directory entries to the extended
> +        regular expression "^st[0-9]+$"
> +4. Access the statistics from the /sys/class/scsi_tape/<match>/device/statistics
> +        directory (where <match> is a directory entry from /sys/class/scsi_tape
> +        that matched the extended regular expression)
> +
> +An example of a matching directory with the full path to the statistics
> +directory would be:
> +
> +/sys/class/scsi_tape/st0/device/statistics
> +
> +This is not the real path to the statistics directory but it does represent a
> +path which makes the directories easiest to find. The statistics directory is
> +common to all tape devices associated with a SCSI tape drive the statistics
> +will be the same for the st0, nst0, etc.
> +
> +The statistics directory contains the following files:
> +
> +1.  in_flight - The number of I/Os currently outstanding to this device.
> +2.  io_ms - The amount of time spent waiting (in milliseconds) for all I/O
> +        to complete (including read and write). This includes tape movement
> +        commands such as seeking between file or set marks and implicit tape
> +        movement such as when rewind on close tape devices are used.
> +3.  other_cnt - The number of I/Os issued to the tape drive other than read or
> +        write commands. The time taken to complete these commands is tracked
> +        in io_ms.
> +4.  read_byte_cnt - The number of bytes read from the tape drive.
> +5.  read_cnt - The number of read requests issued to the tape drive.
> +6.  read_ms - The amount of time (in milliseconds) spent waiting for read
> +        requests to complete.
> +7.  sync - Described below
> +8.  write_byte_cnt - The number of bytes written to the tape drive.
> +9.  write_cnt - The number of write requests issued to the tape drive.
> +10. write_ms - The amount of time (in milliseconds) spent waiting for write
> +        requests to complete.
> +
> +Note: The count statistics are incrememented at the start of an I/O but the
> +time they take to complete is not added to the statistics until they complete.
> +
> +When read all of the statistics may be out of sync. That is they may not be
> +on consistent set of statistics because of the amount of time taken to
> +open/read/close each statistics file if the tape drive is in use.
> +To alleviate that issue and try to make a set of consistent statistics
> +the sync file was created.
> +
> +When a non-zero value is written into the sync file all of the statistics
> +are saved. Until a new value is written into the sync file accesses to the
> +statistics files will return the saved values instead of the current values
> +(writing the same value as previously written causes the statistics to be
> +saved again). Writing 0 makes the statistics files provides access to the
> +current instead of the saved values.
> +
> +The following is the suggested way to access the statistics when you require
> +a consistent set of statistics:
> +
> +1. Determine the location of the statistics directory
> +2. Open the sync file and write a set value into the file (choose one value
> +        that your application uses always - doing so will allow someone else
> +        to determine who else is using the sync file)
> +3. Open, read, then close the individual statistics files
> +4. Read back the sync value and ensure you read back the value that you wrote.
> +If the value read back is not the same write your chosen value back into the
> +sync file and then re-read the statistics. If the sync value has changed again
> +stop after 3 attempts and just use the values you gathered. There should not
> +be that many users of the statistics that a collision is likely to happen.
> +5. Always write 0 back into the sync file when you are done then close it.
> +
> +If you wish to manually view the statistics you can do so in the following
> +manner:
> +
> +$ cd /sys/class/scsi_tape/st0/device/statistics
> +$ grep [0-9] *
> +in_flight:0
> +io_ms:33263
> +other_cnt:5
> +read_byte_cnt:0
> +read_cnt:0
> +read_ms:0
> +sync:0
> +write_byte_cnt:104857600
> +write_cnt:102400
> +write_ms:24069
> +
> +
>  BSD AND SYS V SEMANTICS
>
>  The user can choose between these two behaviours of the tape driver by
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-02-02 15:16 ` Laurence Oberman
@ 2015-02-05 17:03   ` "Kai Mäkisara (Kolumbus)"
  2015-02-05 17:40     ` Laurence Oberman
  0 siblings, 1 reply; 17+ messages in thread
From: "Kai Mäkisara (Kolumbus)" @ 2015-02-05 17:03 UTC (permalink / raw)
  To: Laurence Oberman
  Cc: Seymour, Shane M, loberman@redhat.com, linux-scsi@vger.kernel.org,
	James E.J. Bottomley (JBottomley@parallels.com), jeffm@suse.com


> On 2.2.2015, at 17.16, Laurence Oberman <oberman.l@gmail.com> wrote:
> 
> I pulled this this morning and will be testing. The prior version was
> stable for me on the upstream and RHEL 6.5 kernel without exhaustive
> testing.
> We also just received more requests to get this into RHEL from HP /
> Red Hat customers.
> 
> Kai, what are your thoughts. I realize this is a large amount of
> additional code. I am not keen to create a driver just for stats as we
> would have to keep the rest of the st driver changes always in sync.
> 

I still think that the tape statistics should be exported like the statistics of “real” block devices, i.e., one sysfs file exporting on a single line the statistics that temporally belong together. James rejected this approach. I am leaving the decision about this code to him. I will neither ack nor nak this code.

Thanks,
Kai

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-02-05 17:03   ` "Kai Mäkisara (Kolumbus)"
@ 2015-02-05 17:40     ` Laurence Oberman
  2015-02-05 17:46       ` "Kai Mäkisara (Kolumbus)"
  0 siblings, 1 reply; 17+ messages in thread
From: Laurence Oberman @ 2015-02-05 17:40 UTC (permalink / raw)
  To: Kai Mäkisara (Kolumbus)
  Cc: Laurence Oberman, Shane M Seymour, linux-scsi,
	James E.J. Bottomley (JBottomley@parallels.com), jeffm

----- Original Message -----
From: "Kai Mäkisara (Kolumbus)" <kai.makisara@kolumbus.fi>
To: "Laurence Oberman" <oberman.l@gmail.com>
Cc: "Shane M Seymour" <shane.seymour@hp.com>, loberman@redhat.com, linux-scsi@vger.kernel.org, "James E.J. Bottomley (JBottomley@parallels.com)" <JBottomley@parallels.com>, jeffm@suse.com
Sent: Thursday, February 5, 2015 12:03:29 PM
Subject: Re: [PATCH] st: implement sysfs based tape statistics v2


> On 2.2.2015, at 17.16, Laurence Oberman <oberman.l@gmail.com> wrote:
> 
> I pulled this this morning and will be testing. The prior version was
> stable for me on the upstream and RHEL 6.5 kernel without exhaustive
> testing.
> We also just received more requests to get this into RHEL from HP /
> Red Hat customers.
> 
> Kai, what are your thoughts. I realize this is a large amount of
> additional code. I am not keen to create a driver just for stats as we
> would have to keep the rest of the st driver changes always in sync.
> 

I still think that the tape statistics should be exported like the statistics of “real” block devices, i.e., one sysfs file exporting on a single line the statistics that temporally belong together. James rejected this approach. I am leaving the decision about this code to him. I will neither ack nor nak this code.

Thanks,
Kai

Hello Kai,

I missed the earlier conversations with James, I will go search for them.
Do you mean add them so they are similar to the /proc/diskstats

cat /proc/diskstats
..
   8       0 sda 2258346 152801 291907067 5263795 388817 1518048 15013833 4542062 0 4794931 9803495
   8       1 sda1 717 102 26154 1179 8 2 80 76 0 1172 1254
   8       2 sda2 328 31 2872 1554 0 0 0 0 0 1554 1554
   8       3 sda3 2195205 151617 290898283 5203627 355053 1518046 15009528 4370598 0 4594137 9571937
   8       4 sda4 61921 1050 978350 57218 18 0 4225 34 0 56384 57185
  11       0 sr0 0 0 0 0 0 0 0 0 0 0 0
..


Laurence Oberman
Red Hat Global Support Service
SEG Team

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-02-05 17:40     ` Laurence Oberman
@ 2015-02-05 17:46       ` "Kai Mäkisara (Kolumbus)"
  2015-02-05 18:12         ` Laurence Oberman
                           ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: "Kai Mäkisara (Kolumbus)" @ 2015-02-05 17:46 UTC (permalink / raw)
  To: Laurence Oberman
  Cc: Laurence Oberman, Shane M Seymour, linux-scsi,
	James E.J. Bottomley (JBottomley@parallels.com), jeffm


> On 5.2.2015, at 19.40, Laurence Oberman <loberman@redhat.com> wrote:
> 
> ----- Original Message -----
> From: "Kai Mäkisara (Kolumbus)" <kai.makisara@kolumbus.fi>
> To: "Laurence Oberman" <oberman.l@gmail.com>
> Cc: "Shane M Seymour" <shane.seymour@hp.com>, loberman@redhat.com, linux-scsi@vger.kernel.org, "James E.J. Bottomley (JBottomley@parallels.com)" <JBottomley@parallels.com>, jeffm@suse.com
> Sent: Thursday, February 5, 2015 12:03:29 PM
> Subject: Re: [PATCH] st: implement sysfs based tape statistics v2
> 
> 
>> On 2.2.2015, at 17.16, Laurence Oberman <oberman.l@gmail.com> wrote:
>> 
>> I pulled this this morning and will be testing. The prior version was
>> stable for me on the upstream and RHEL 6.5 kernel without exhaustive
>> testing.
>> We also just received more requests to get this into RHEL from HP /
>> Red Hat customers.
>> 
>> Kai, what are your thoughts. I realize this is a large amount of
>> additional code. I am not keen to create a driver just for stats as we
>> would have to keep the rest of the st driver changes always in sync.
>> 
> 
> I still think that the tape statistics should be exported like the statistics of “real” block devices, i.e., one sysfs file exporting on a single line the statistics that temporally belong together. James rejected this approach. I am leaving the decision about this code to him. I will neither ack nor nak this code.
> 
> Thanks,
> Kai
> 
> Hello Kai,
> 
> I missed the earlier conversations with James, I will go search for them.
> Do you mean add them so they are similar to the /proc/diskstats
> 
> cat /proc/diskstats
> ..
>   8       0 sda 2258346 152801 291907067 5263795 388817 1518048 15013833 4542062 0 4794931 9803495
>   8       1 sda1 717 102 26154 1179 8 2 80 76 0 1172 1254
>   8       2 sda2 328 31 2872 1554 0 0 0 0 0 1554 1554
>   8       3 sda3 2195205 151617 290898283 5203627 355053 1518046 15009528 4370598 0 4594137 9571937
>   8       4 sda4 61921 1050 978350 57218 18 0 4225 34 0 56384 57185
>  11       0 sr0 0 0 0 0 0 0 0 0 0 0 0
> ..
> 
Not exactly. I mean the data exported in sysfs, for example:

> cat /sys/block/sda/sda1/stat
  159740     9006  5941506    64461   124724    55907 12772208  3598677        0   299875  3663235

Kai

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-02-05 17:46       ` "Kai Mäkisara (Kolumbus)"
@ 2015-02-05 18:12         ` Laurence Oberman
  2015-02-05 18:43         ` James Bottomley
  2015-02-05 18:50         ` Bryn M. Reeves
  2 siblings, 0 replies; 17+ messages in thread
From: Laurence Oberman @ 2015-02-05 18:12 UTC (permalink / raw)
  To: Kai Mäkisara (Kolumbus)
  Cc: Laurence Oberman, Shane M Seymour, linux-scsi,
	James E.J. Bottomley (JBottomley@parallels.com), jeffm

----- Original Message -----
From: "Kai Mäkisara (Kolumbus)" <kai.makisara@kolumbus.fi>
To: "Laurence Oberman" <loberman@redhat.com>
Cc: "Laurence Oberman" <oberman.l@gmail.com>, "Shane M Seymour" <shane.seymour@hp.com>, linux-scsi@vger.kernel.org, "James E.J. Bottomley (JBottomley@parallels.com)" <JBottomley@parallels.com>, jeffm@suse.com
Sent: Thursday, February 5, 2015 12:46:32 PM
Subject: Re: [PATCH] st: implement sysfs based tape statistics v2


> On 5.2.2015, at 19.40, Laurence Oberman <loberman@redhat.com> wrote:
> 
> ----- Original Message -----
> From: "Kai Mäkisara (Kolumbus)" <kai.makisara@kolumbus.fi>
> To: "Laurence Oberman" <oberman.l@gmail.com>
> Cc: "Shane M Seymour" <shane.seymour@hp.com>, loberman@redhat.com, linux-scsi@vger.kernel.org, "James E.J. Bottomley (JBottomley@parallels.com)" <JBottomley@parallels.com>, jeffm@suse.com
> Sent: Thursday, February 5, 2015 12:03:29 PM
> Subject: Re: [PATCH] st: implement sysfs based tape statistics v2
> 
> 
>> On 2.2.2015, at 17.16, Laurence Oberman <oberman.l@gmail.com> wrote:
>> 
>> I pulled this this morning and will be testing. The prior version was
>> stable for me on the upstream and RHEL 6.5 kernel without exhaustive
>> testing.
>> We also just received more requests to get this into RHEL from HP /
>> Red Hat customers.
>> 
>> Kai, what are your thoughts. I realize this is a large amount of
>> additional code. I am not keen to create a driver just for stats as we
>> would have to keep the rest of the st driver changes always in sync.
>> 
> 
> I still think that the tape statistics should be exported like the statistics of “real” block devices, i.e., one sysfs file exporting on a single line the statistics that temporally belong together. James rejected this approach. I am leaving the decision about this code to him. I will neither ack nor nak this code.
> 
> Thanks,
> Kai
> 
> Hello Kai,
> 
> I missed the earlier conversations with James, I will go search for them.
> Do you mean add them so they are similar to the /proc/diskstats
> 
> cat /proc/diskstats
> ..
>   8       0 sda 2258346 152801 291907067 5263795 388817 1518048 15013833 4542062 0 4794931 9803495
>   8       1 sda1 717 102 26154 1179 8 2 80 76 0 1172 1254
>   8       2 sda2 328 31 2872 1554 0 0 0 0 0 1554 1554
>   8       3 sda3 2195205 151617 290898283 5203627 355053 1518046 15009528 4370598 0 4594137 9571937
>   8       4 sda4 61921 1050 978350 57218 18 0 4225 34 0 56384 57185
>  11       0 sr0 0 0 0 0 0 0 0 0 0 0 0
> ..
> 
Not exactly. I mean the data exported in sysfs, for example:

> cat /sys/block/sda/sda1/stat
  159740     9006  5941506    64461   124724    55907 12772208  3598677        0   299875  3663235

Kai

Ok, Thanks, got it now. Let me circle back with Shane

Laurence Oberman
Red Hat Global Support Service
SEG Team
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-02-05 17:46       ` "Kai Mäkisara (Kolumbus)"
  2015-02-05 18:12         ` Laurence Oberman
@ 2015-02-05 18:43         ` James Bottomley
  2015-02-05 18:50         ` Bryn M. Reeves
  2 siblings, 0 replies; 17+ messages in thread
From: James Bottomley @ 2015-02-05 18:43 UTC (permalink / raw)
  To: "Kai Mäkisara (Kolumbus)"
  Cc: Laurence Oberman, Laurence Oberman, Shane M Seymour, linux-scsi,
	jeffm

On Thu, 2015-02-05 at 19:46 +0200, "Kai Mäkisara (Kolumbus)" wrote:
> > On 5.2.2015, at 19.40, Laurence Oberman <loberman@redhat.com> wrote:
> > I missed the earlier conversations with James, I will go search for them.
> > Do you mean add them so they are similar to the /proc/diskstats
> > 
> > cat /proc/diskstats
> > ..
> >   8       0 sda 2258346 152801 291907067 5263795 388817 1518048 15013833 4542062 0 4794931 9803495
> >   8       1 sda1 717 102 26154 1179 8 2 80 76 0 1172 1254
> >   8       2 sda2 328 31 2872 1554 0 0 0 0 0 1554 1554
> >   8       3 sda3 2195205 151617 290898283 5203627 355053 1518046 15009528 4370598 0 4594137 9571937
> >   8       4 sda4 61921 1050 978350 57218 18 0 4225 34 0 56384 57185
> >  11       0 sr0 0 0 0 0 0 0 0 0 0 0 0
> > ..
> > 
> Not exactly. I mean the data exported in sysfs, for example:
> 
> > cat /sys/block/sda/sda1/stat
>   159740     9006  5941506    64461   124724    55907 12772208  3598677        0   299875  3663235

The problem is we're going to be attacked by the sysfs one value per
file crowd if we do something like this.  Block gets away with it
because it was grandfathered in.  It's only 12 files ... if there's a
good reason for having the fight with the sysfs people, we can; however,
I haven't seen a good reason yet, so I'd rather avoid creating a fuss
over something we're going to lose.

James


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-02-05 17:46       ` "Kai Mäkisara (Kolumbus)"
  2015-02-05 18:12         ` Laurence Oberman
  2015-02-05 18:43         ` James Bottomley
@ 2015-02-05 18:50         ` Bryn M. Reeves
  2015-02-05 18:55           ` James Bottomley
  2 siblings, 1 reply; 17+ messages in thread
From: Bryn M. Reeves @ 2015-02-05 18:50 UTC (permalink / raw)
  To: "Kai Mäkisara (Kolumbus)"
  Cc: Laurence Oberman, Laurence Oberman, Shane M Seymour, linux-scsi,
	James E.J. Bottomley (JBottomley@parallels.com), jeffm

On Thu, Feb 05, 2015 at 07:46:32PM +0200, "Kai Mäkisara (Kolumbus)" wrote:
> > On 5.2.2015, at 19.40, Laurence Oberman <loberman@redhat.com> wrote:
> > From: "Kai Mäkisara (Kolumbus)" <kai.makisara@kolumbus.fi>
> > I still think that the tape statistics should be exported like the statistics of “real” block devices, i.e., one sysfs file exporting on a single line the statistics that temporally belong together. James rejected this approach. I am leaving the decision about this code to him. I will neither ack nor nak this code.
> > 
> > I missed the earlier conversations with James, I will go search for them.
> > Do you mean add them so they are similar to the /proc/diskstats

http://comments.gmane.org/gmane.linux.scsi/80497

On Fri, Feb 22 2013 James Bottomley wrote:
>>>> I'm afraid we can't do it the way you're proposing.  files in sysfs must
>>>> conform to the one value per file rule (so we avoid the ABI nastiness
>>>> that plagues /proc).  You can create a stat directory with a bunch of
>>>> files, but not a single file that gives all values.

Documentation/filesystems/sysfs.txt does not agree:

  "Attributes should be ASCII text files, preferably with only one value
  per file. It is noted that it may not be efficient to contain only one
  value per file, so it is socially acceptable to express an array of
  values of the same type."

There's also ample precedent for this: sysfs disk and partition stats,
SELinux cache and hash table stats (which have a pretty yucky 2d int
array with column headers and a name: val format respectively).

There's also a bunch of multivariate name=value format stats files in
the cgroups sysfs tree.

> Not exactly. I mean the data exported in sysfs, for example:
> 
> > cat /sys/block/sda/sda1/stat
>   159740     9006  5941506    64461   124724    55907 12772208  3598677        0   299875  3663235

I'd prefer to consume tape stats in this format too; it follows the
principle of least surprise since it's shared with every other IO stats
source (including device-mapper statistics) and it simplifies handling
the counters in user space.

Regards,
Bryn.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-02-05 18:50         ` Bryn M. Reeves
@ 2015-02-05 18:55           ` James Bottomley
  2015-02-05 18:57             ` Bryn M. Reeves
  0 siblings, 1 reply; 17+ messages in thread
From: James Bottomley @ 2015-02-05 18:55 UTC (permalink / raw)
  To: Bryn M. Reeves
  Cc: "Kai Mäkisara (Kolumbus)", Laurence Oberman,
	Laurence Oberman, Shane M Seymour, linux-scsi, jeffm

On Thu, 2015-02-05 at 18:50 +0000, Bryn M. Reeves wrote:
> On Thu, Feb 05, 2015 at 07:46:32PM +0200, "Kai Mäkisara (Kolumbus)" wrote:
> > > On 5.2.2015, at 19.40, Laurence Oberman <loberman@redhat.com> wrote:
> > > From: "Kai Mäkisara (Kolumbus)" <kai.makisara@kolumbus.fi>
> > > I still think that the tape statistics should be exported like the statistics of “real” block devices, i.e., one sysfs file exporting on a single line the statistics that temporally belong together. James rejected this approach. I am leaving the decision about this code to him. I will neither ack nor nak this code.
> > > 
> > > I missed the earlier conversations with James, I will go search for them.
> > > Do you mean add them so they are similar to the /proc/diskstats
> 
> http://comments.gmane.org/gmane.linux.scsi/80497
> 
> On Fri, Feb 22 2013 James Bottomley wrote:
> >>>> I'm afraid we can't do it the way you're proposing.  files in sysfs must
> >>>> conform to the one value per file rule (so we avoid the ABI nastiness
> >>>> that plagues /proc).  You can create a stat directory with a bunch of
> >>>> files, but not a single file that gives all values.
> 
> Documentation/filesystems/sysfs.txt does not agree:
> 
>   "Attributes should be ASCII text files, preferably with only one value
>   per file. It is noted that it may not be efficient to contain only one
>   value per file, so it is socially acceptable to express an array of
>   values of the same type."
> 
> There's also ample precedent for this: sysfs disk and partition stats,
> SELinux cache and hash table stats (which have a pretty yucky 2d int
> array with column headers and a name: val format respectively).
> 
> There's also a bunch of multivariate name=value format stats files in
> the cgroups sysfs tree.
> 
> > Not exactly. I mean the data exported in sysfs, for example:
> > 
> > > cat /sys/block/sda/sda1/stat
> >   159740     9006  5941506    64461   124724    55907 12772208  3598677        0   299875  3663235
> 
> I'd prefer to consume tape stats in this format too; it follows the
> principle of least surprise since it's shared with every other IO stats
> source (including device-mapper statistics) and it simplifies handling
> the counters in user space.

OK, the sysfs bikeshedders hang out on linux-api

https://www.kernel.org/doc/man-pages/linux-api-ml.html

If you can convince them, we'll do the single file approach.

James


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-02-05 18:55           ` James Bottomley
@ 2015-02-05 18:57             ` Bryn M. Reeves
  0 siblings, 0 replies; 17+ messages in thread
From: Bryn M. Reeves @ 2015-02-05 18:57 UTC (permalink / raw)
  To: James Bottomley
  Cc: "Kai Mäkisara (Kolumbus)", Laurence Oberman,
	Laurence Oberman, Shane M Seymour, linux-scsi, jeffm

On Thu, Feb 05, 2015 at 10:55:50AM -0800, James Bottomley wrote:
> OK, the sysfs bikeshedders hang out on linux-api
> 
> https://www.kernel.org/doc/man-pages/linux-api-ml.html
> 
> If you can convince them, we'll do the single file approach.

Will do - I've got a couple of stats projects on the go at the moment so
this ties in well with that.

I'll sync up with Shane and see if he's interested in running the int array
version via the linux-api folks.

Regards,
Bryn.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-01-27  0:11 ` Seymour, Shane M
@ 2015-02-06 17:49   ` Jeremy Linton
  0 siblings, 0 replies; 17+ messages in thread
From: Jeremy Linton @ 2015-02-06 17:49 UTC (permalink / raw)
  To: Seymour, Shane M, linux-scsi@vger.kernel.org
  Cc: Kai.Makisara@kolumbus.fi,
	James E.J. Bottomley (JBottomley@parallels.com), jeffm@suse.com

On 1/26/2015 6:11 PM, Seymour, Shane M wrote:
> I was wondering if anyone had any feedback or had any chance to review the changes?

	Per the other discussion about having the same stat format forever. It seems to
me that you might want to preemptively add a few additional counters.
	
A counter for WRITE_FILEMARKS, particularly non immediate count=0 ones, which
are often used to flush the drive write buffer. A counter for movement related
commands like SPACE/LOCATE/REWIND would also be helpful. Finally, abnormal read
conditions like, ILI's, and hit FMs should have their own stat. Those three
should provide a better view into how the drive is being used and why
performance may not be what is expected.

There may be others, but those three are high on my list of things I want to
know about a tape stream that is not performing up to expectations.






^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-01-13  3:43 [PATCH] st: implement sysfs based tape statistics v2 Seymour, Shane M
  2015-01-27  0:11 ` Seymour, Shane M
  2015-02-02 15:16 ` Laurence Oberman
@ 2015-02-08 17:07 ` Dale R. Worley
  2015-02-08 23:19   ` Seymour, Shane M
  2015-02-09  6:00   ` Seymour, Shane M
  2 siblings, 2 replies; 17+ messages in thread
From: Dale R. Worley @ 2015-02-08 17:07 UTC (permalink / raw)
  To: Seymour, Shane M; +Cc: linux-scsi

One feature of tape statistics is that they're as much about the *tape*
as they are about the *drive*, which is uncommon for block devices.  It
might be useful to have a set of counters which is cleared every time a
new tape is inserted into the drive.  In particular, "bad reads since
this tape was inserted" would be very useful for monitoring the quality
of tapes.

Dale

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH] st: implement sysfs based tape statistics v2
  2015-02-08 17:07 ` Dale R. Worley
@ 2015-02-08 23:19   ` Seymour, Shane M
  2015-02-11  3:38     ` Dale R. Worley
  2015-02-09  6:00   ` Seymour, Shane M
  1 sibling, 1 reply; 17+ messages in thread
From: Seymour, Shane M @ 2015-02-08 23:19 UTC (permalink / raw)
  To: Dale R. Worley; +Cc: linux-scsi@vger.kernel.org

Both of those things would have to be futures and require discussion - the very original version cleared stats on a device open but I got asked to keep it the stats cumulative so they would be more similar to disk stats. I'm not sure about the bad reads idea I'd have to look and see if some other layer provided device error information. I've got some changes to make and don't want to change it into a moving target that needs more discussion at this point.

-----Original Message-----
From: Dale R. Worley [mailto:worley@alum.mit.edu] 
Sent: Monday, February 09, 2015 4:08 AM
To: Seymour, Shane M
Cc: linux-scsi@vger.kernel.org
Subject: Re: [PATCH] st: implement sysfs based tape statistics v2

One feature of tape statistics is that they're as much about the *tape* as they are about the *drive*, which is uncommon for block devices.  It might be useful to have a set of counters which is cleared every time a new tape is inserted into the drive.  In particular, "bad reads since this tape was inserted" would be very useful for monitoring the quality of tapes.

Dale

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH] st: implement sysfs based tape statistics v2
  2015-02-08 17:07 ` Dale R. Worley
  2015-02-08 23:19   ` Seymour, Shane M
@ 2015-02-09  6:00   ` Seymour, Shane M
  2015-02-09 19:14     ` "Kai Mäkisara (Kolumbus)"
  1 sibling, 1 reply; 17+ messages in thread
From: Seymour, Shane M @ 2015-02-09  6:00 UTC (permalink / raw)
  To: Dale R. Worley, Kai.Makisara@kolumbus.fi; +Cc: linux-scsi@vger.kernel.org

Kai - see last 3 paragraphs for question about if something is a bug or not.

BTW I had a look - I couldn't quickly find out if there was a way to tell if the medium has changed in a tape drive (there could be something device specific). At the device level there's a record of I/O errors:

[root@tapedrive device]# pwd
/sys/class/scsi_tape/st0/device
[root@tapedrive device]# cat ioerr_cnt
0x5

That doesn't distinguish between read or write or any other kind of error though - it doesn't even really have to mean an error since reading with a block size too small also increments the error count:

[root@tapedrive tape-stats]# ./tape_exercise write /dev/st0 /dev/urandom 1000000
About to write from /dev/urandom to /dev/st0, max size = 1000000, blksize = 4096
Write complete on /dev/st0 after 1003520 bytes
./tape_[root@tapedrive tape-stats]# ./tape_exercise read /dev/st0
About to read from /dev/st0 blksize = 256
Failed to read from /dev/st0 using current blksize, will try using a bigger blksize
About to read from /dev/st0 blksize = 512
Failed to read from /dev/st0 using current blksize, will try using a bigger blksize
About to read from /dev/st0 blksize = 1024
Failed to read from /dev/st0 using current blksize, will try using a bigger blksize
About to read from /dev/st0 blksize = 2048
Failed to read from /dev/st0 using current blksize, will try using a bigger blksize
About to read from /dev/st0 blksize = 4096
Reading complete for /dev/st0, 987136 bytes read

[root@tapedrive tape-stats]# cd /sys/class/scsi_tape/st0/device
[root@tapedrive device]# cat ioerr_cnt
0xa

It would seem that ioerr_cnt is probably a count of SCSI check conditions encountered?

Unfortunately for the MTIOCGET ioctl the value of mt_resid is the partition number of the tape not what I would have expected it to be - the residual left after the last read or write that returned an error (and 0 if the last read/write didn't return an error).

Kai - is that a bug? Shouldn't mt_resid be the residual from the last failed read or write indicating how much data didn't make it to the device and 0 on a successful read or write? I've used mt_resid from MTIOCGET on HP-UX previously when issuing reads and repositioning and retrying tape reads when starting with too low a block size to try and automatically determine the block size in use (it's never a good idea to under or overread tape blocks because it always results in check conditions and in the st driver under reading the block size always creates messages in dmesg).

If you don't have time to look at it I may look at if it's possible to provide the correct mt_resid for MTIOCGET - assuming that a long time if misuse prevents us from correcting it. If there's no way to export a partition number for the devices that support it I can add a new sysfs file (call it partition) to export it that way and see if I can get the correct value into mt_resid.

-----Original Message-----
From: Seymour, Shane M 
Sent: Monday, February 09, 2015 10:19 AM
To: 'Dale R. Worley'
Cc: linux-scsi@vger.kernel.org
Subject: RE: [PATCH] st: implement sysfs based tape statistics v2

Both of those things would have to be futures and require discussion - the very original version cleared stats on a device open but I got asked to keep it the stats cumulative so they would be more similar to disk stats. I'm not sure about the bad reads idea I'd have to look and see if some other layer provided device error information. I've got some changes to make and don't want to change it into a moving target that needs more discussion at this point.

-----Original Message-----
From: Dale R. Worley [mailto:worley@alum.mit.edu] 
Sent: Monday, February 09, 2015 4:08 AM
To: Seymour, Shane M
Cc: linux-scsi@vger.kernel.org
Subject: Re: [PATCH] st: implement sysfs based tape statistics v2

One feature of tape statistics is that they're as much about the *tape* as they are about the *drive*, which is uncommon for block devices.  It might be useful to have a set of counters which is cleared every time a new tape is inserted into the drive.  In particular, "bad reads since this tape was inserted" would be very useful for monitoring the quality of tapes.

Dale

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-02-09  6:00   ` Seymour, Shane M
@ 2015-02-09 19:14     ` "Kai Mäkisara (Kolumbus)"
  0 siblings, 0 replies; 17+ messages in thread
From: "Kai Mäkisara (Kolumbus)" @ 2015-02-09 19:14 UTC (permalink / raw)
  To: Seymour, Shane M; +Cc: Dale R. Worley, linux-scsi@vger.kernel.org


> On 9.2.2015, at 8.00, Seymour, Shane M <shane.seymour@hp.com> wrote:
> 
> Kai - see last 3 paragraphs for question about if something is a bug or not.
> 
> BTW I had a look - I couldn't quickly find out if there was a way to tell if the medium has changed in a tape drive (there could be something device specific). At the device level there's a record of I/O errors:
> 
> [root@tapedrive device]# pwd
> /sys/class/scsi_tape/st0/device
> [root@tapedrive device]# cat ioerr_cnt
> 0x5
>  
> That doesn't distinguish between read or write or any other kind of error though - it doesn't even really have to mean an error since reading with a block size too small also increments the error count:

The counts in the device directory are not specific to tapes. From linux/scsi/scsi_lib.c one can see that ierr_cnt is incremented every time the scsi call returns any kind of error, including when the tape drive wants to return some information with sense data.

> [root@tapedrive tape-stats]# ./tape_exercise write /dev/st0 /dev/urandom 1000000
> About to write from /dev/urandom to /dev/st0, max size = 1000000, blksize = 4096
> Write complete on /dev/st0 after 1003520 bytes
> ./tape_[root@tapedrive tape-stats]# ./tape_exercise read /dev/st0
> About to read from /dev/st0 blksize = 256
> Failed to read from /dev/st0 using current blksize, will try using a bigger blksize
> About to read from /dev/st0 blksize = 512
> Failed to read from /dev/st0 using current blksize, will try using a bigger blksize
> About to read from /dev/st0 blksize = 1024
> Failed to read from /dev/st0 using current blksize, will try using a bigger blksize
> About to read from /dev/st0 blksize = 2048
> Failed to read from /dev/st0 using current blksize, will try using a bigger blksize
> About to read from /dev/st0 blksize = 4096
> Reading complete for /dev/st0, 987136 bytes read
> 
> [root@tapedrive tape-stats]# cd /sys/class/scsi_tape/st0/device
> [root@tapedrive device]# cat ioerr_cnt
> 0xa
> 
> It would seem that ioerr_cnt is probably a count of SCSI check conditions encountered?
> 
> Unfortunately for the MTIOCGET ioctl the value of mt_resid is the partition number of the tape not what I would have expected it to be - the residual left after the last read or write that returned an error (and 0 if the last read/write didn't return an error).
> 
> Kai - is that a bug? Shouldn't mt_resid be the residual from the last failed read or write indicating how much data didn't make it to the device and 0 on a successful read or write? I've used mt_resid from MTIOCGET on HP-UX previously when issuing reads and repositioning and retrying tape reads when starting with too low a block size to try and automatically determine the block size in use (it's never a good idea to under or overread tape blocks because it always results in check conditions and in the st driver under reading the block size always creates messages in dmesg).
> 
This is not a bug. man st documents that mt_resid does return the partition number. In the different unix systems the field did not consistently return the residual count. The Linux SCSI drivers did not at that time return the residual. These are reasons why the field is used for partition number.

For writes in any real situation (drive in buffered mode) you don’t know when the write() returns whether the data can be written to tape. All writes are on tape when write file marks returns successfully of the device is successfully closed. I am not sure that the amount of data successfully written is really useful. If I see a write error, I want to rewrite at least the latest file.

For reads, there are other ways to determine the block size. (Use a large enough byte count and see what you get.)

The st driver does not write to the log if the block is shorter than requested. Short read is logged (together with the real block size). If you don’t want the overhead of returning the sense data for short reads, you can set the SILI flag.

> If you don't have time to look at it I may look at if it's possible to provide the correct mt_resid for MTIOCGET - assuming that a long time if misuse prevents us from correcting it. If there's no way to export a partition number for the devices that support it I can add a new sysfs file (call it partition) to export it that way and see if I can get the correct value into mt_resid.
> 
Changing the definition of a field should not be done. There is software that is correctly using the field as it is documented.

Kai

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] st: implement sysfs based tape statistics v2
  2015-02-08 23:19   ` Seymour, Shane M
@ 2015-02-11  3:38     ` Dale R. Worley
  0 siblings, 0 replies; 17+ messages in thread
From: Dale R. Worley @ 2015-02-11  3:38 UTC (permalink / raw)
  To: Seymour, Shane M; +Cc: linux-scsi

"Seymour, Shane M" <shane.seymour@hp.com> writes:
> Both of those things would have to be futures and require discussion -
> the very original version cleared stats on a device open but I got
> asked to keep it the stats cumulative so they would be more similar to
> disk stats.

Yes, this is a futures question.  But in a perfect world, there'd be two
sets of statistics, one cumulative since the last reboot and one that
was cleared every time a tape was unloaded.

Dale

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2015-02-11  3:38 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-01-13  3:43 [PATCH] st: implement sysfs based tape statistics v2 Seymour, Shane M
2015-01-27  0:11 ` Seymour, Shane M
2015-02-06 17:49   ` Jeremy Linton
2015-02-02 15:16 ` Laurence Oberman
2015-02-05 17:03   ` "Kai Mäkisara (Kolumbus)"
2015-02-05 17:40     ` Laurence Oberman
2015-02-05 17:46       ` "Kai Mäkisara (Kolumbus)"
2015-02-05 18:12         ` Laurence Oberman
2015-02-05 18:43         ` James Bottomley
2015-02-05 18:50         ` Bryn M. Reeves
2015-02-05 18:55           ` James Bottomley
2015-02-05 18:57             ` Bryn M. Reeves
2015-02-08 17:07 ` Dale R. Worley
2015-02-08 23:19   ` Seymour, Shane M
2015-02-11  3:38     ` Dale R. Worley
2015-02-09  6:00   ` Seymour, Shane M
2015-02-09 19:14     ` "Kai Mäkisara (Kolumbus)"

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).