* [PATCH 1/9] stat: reduce arguments of add_*lat_sample() functions
2024-08-27 7:08 [PATCH 0/9] introduce the log_issue_time option Shin'ichiro Kawasaki
@ 2024-08-27 7:08 ` Shin'ichiro Kawasaki
2024-08-27 7:08 ` [PATCH 2/9] stat: reduce arguments of add_log_sample() Shin'ichiro Kawasaki
` (8 subsequent siblings)
9 siblings, 0 replies; 25+ messages in thread
From: Shin'ichiro Kawasaki @ 2024-08-27 7:08 UTC (permalink / raw)
To: fio, Jens Axboe, Vincent Fu; +Cc: Damien Le Moal, Shin'ichiro Kawasaki
The functions add_clat_sample(), add_lat_sample() and add_slat_sample()
have a rather large number of arguments, and some of the arguments are
members of the struct io_u. Pass io_u instead of those arguments to
reduce the number of arguments.
Some add_clat_sample() callers in engines/fileoperations.c do not have
io_u reference, then pass NULL instead of the io_u. This indicates to
use 0 values instead of the io_u fields.
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
---
engines/fileoperations.c | 6 +++---
io_u.c | 14 +++-----------
stat.c | 35 +++++++++++++++++++++++------------
stat.h | 13 +++++++------
4 files changed, 36 insertions(+), 32 deletions(-)
diff --git a/engines/fileoperations.c b/engines/fileoperations.c
index c52f0900..e5303359 100644
--- a/engines/fileoperations.c
+++ b/engines/fileoperations.c
@@ -129,7 +129,7 @@ static int open_file(struct thread_data *td, struct fio_file *f)
uint64_t nsec;
nsec = ntime_since_now(&start);
- add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
+ add_clat_sample(td, data->stat_ddir, nsec, 0, NULL);
}
return 0;
@@ -200,7 +200,7 @@ static int stat_file(struct thread_data *td, struct fio_file *f)
uint64_t nsec;
nsec = ntime_since_now(&start);
- add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
+ add_clat_sample(td, data->stat_ddir, nsec, 0, NULL);
}
return 0;
@@ -250,7 +250,7 @@ static int delete_file(struct thread_data *td, struct fio_file *f)
uint64_t nsec;
nsec = ntime_since_now(&start);
- add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
+ add_clat_sample(td, data->stat_ddir, nsec, 0, NULL);
}
return 0;
diff --git a/io_u.c b/io_u.c
index 40b09082..c49cd4df 100644
--- a/io_u.c
+++ b/io_u.c
@@ -2016,8 +2016,7 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
unsigned long long tnsec;
tnsec = ntime_since(&io_u->start_time, &icd->time);
- add_lat_sample(td, idx, tnsec, bytes, io_u->offset,
- io_u->ioprio, io_u->clat_prio_index);
+ add_lat_sample(td, idx, tnsec, bytes, io_u);
if (td->flags & TD_F_PROFILE_OPS) {
struct prof_io_ops *ops = &td->prof_io_ops;
@@ -2038,8 +2037,7 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
if (ddir_rw(idx)) {
if (!td->o.disable_clat) {
- add_clat_sample(td, idx, llnsec, bytes, io_u->offset,
- io_u->ioprio, io_u->clat_prio_index);
+ add_clat_sample(td, idx, llnsec, bytes, io_u);
io_u_mark_latency(td, llnsec);
}
@@ -2301,15 +2299,9 @@ int io_u_queued_complete(struct thread_data *td, int min_evts)
void io_u_queued(struct thread_data *td, struct io_u *io_u)
{
if (!td->o.disable_slat && ramp_time_over(td) && td->o.stats) {
- unsigned long slat_time;
-
- slat_time = ntime_since(&io_u->start_time, &io_u->issue_time);
-
if (td->parent)
td = td->parent;
-
- add_slat_sample(td, io_u->ddir, slat_time, io_u->xfer_buflen,
- io_u->offset, io_u->ioprio);
+ add_slat_sample(td, io_u);
}
}
diff --git a/stat.c b/stat.c
index b98e8b27..fac6d534 100644
--- a/stat.c
+++ b/stat.c
@@ -3297,17 +3297,25 @@ add_lat_percentile_prio_sample(struct thread_stat *ts, unsigned long long nsec,
void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
unsigned long long nsec, unsigned long long bs,
- uint64_t offset, unsigned int ioprio,
- unsigned short clat_prio_index)
+ struct io_u *io_u)
{
const bool needs_lock = td_async_processing(td);
unsigned long elapsed, this_window;
struct thread_stat *ts = &td->ts;
struct io_log *iolog = td->clat_hist_log;
+ uint64_t offset = 0;
+ unsigned int ioprio = 0;
+ unsigned short clat_prio_index = 0;
if (needs_lock)
__td_io_u_lock(td);
+ if (io_u) {
+ offset = io_u->offset;
+ ioprio = io_u->ioprio;
+ clat_prio_index = io_u->clat_prio_index;
+ }
+
add_stat_sample(&ts->clat_stat[ddir], nsec);
/*
@@ -3381,24 +3389,27 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
__td_io_u_unlock(td);
}
-void add_slat_sample(struct thread_data *td, enum fio_ddir ddir,
- unsigned long long nsec, unsigned long long bs,
- uint64_t offset, unsigned int ioprio)
+void add_slat_sample(struct thread_data *td, struct io_u *io_u)
{
const bool needs_lock = td_async_processing(td);
struct thread_stat *ts = &td->ts;
+ enum fio_ddir ddir;
+ unsigned long long nsec;
+ ddir = io_u->ddir;
if (!ddir_rw(ddir))
return;
if (needs_lock)
__td_io_u_lock(td);
+ nsec = ntime_since(&io_u->start_time, &io_u->issue_time);
+
add_stat_sample(&ts->slat_stat[ddir], nsec);
if (td->slat_log)
- add_log_sample(td, td->slat_log, sample_val(nsec), ddir, bs,
- offset, ioprio);
+ add_log_sample(td, td->slat_log, sample_val(nsec), ddir,
+ io_u->xfer_buflen, io_u->offset, io_u->ioprio);
if (ts->slat_percentiles)
add_lat_percentile_sample(ts, nsec, ddir, FIO_SLAT);
@@ -3409,8 +3420,7 @@ void add_slat_sample(struct thread_data *td, enum fio_ddir ddir,
void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
unsigned long long nsec, unsigned long long bs,
- uint64_t offset, unsigned int ioprio,
- unsigned short clat_prio_index)
+ struct io_u * io_u)
{
const bool needs_lock = td_async_processing(td);
struct thread_stat *ts = &td->ts;
@@ -3425,7 +3435,7 @@ void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
if (td->lat_log)
add_log_sample(td, td->lat_log, sample_val(nsec), ddir, bs,
- offset, ioprio);
+ io_u->offset, io_u->ioprio);
/*
* When lat_percentiles=1 (default 0), the reported per priority
@@ -3439,8 +3449,9 @@ void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
*/
if (ts->lat_percentiles) {
add_lat_percentile_sample(ts, nsec, ddir, FIO_LAT);
- add_lat_percentile_prio_sample(ts, nsec, ddir, clat_prio_index);
- add_stat_prio_sample(ts->clat_prio[ddir], clat_prio_index,
+ add_lat_percentile_prio_sample(ts, nsec, ddir,
+ io_u->clat_prio_index);
+ add_stat_prio_sample(ts->clat_prio[ddir], io_u->clat_prio_index,
nsec);
}
if (needs_lock)
diff --git a/stat.h b/stat.h
index 0d57cceb..ac74d6c2 100644
--- a/stat.h
+++ b/stat.h
@@ -366,12 +366,13 @@ extern void reset_io_stats(struct thread_data *);
extern void update_rusage_stat(struct thread_data *);
extern void clear_rusage_stat(struct thread_data *);
-extern void add_lat_sample(struct thread_data *, enum fio_ddir, unsigned long long,
- unsigned long long, uint64_t, unsigned int, unsigned short);
-extern void add_clat_sample(struct thread_data *, enum fio_ddir, unsigned long long,
- unsigned long long, uint64_t, unsigned int, unsigned short);
-extern void add_slat_sample(struct thread_data *, enum fio_ddir, unsigned long long,
- unsigned long long, uint64_t, unsigned int);
+extern void add_lat_sample(struct thread_data *, enum fio_ddir,
+ unsigned long long, unsigned long long,
+ struct io_u *);
+extern void add_clat_sample(struct thread_data *, enum fio_ddir,
+ unsigned long long, unsigned long long,
+ struct io_u *);
+extern void add_slat_sample(struct thread_data *, struct io_u *);
extern void add_agg_sample(union io_sample_data, enum fio_ddir, unsigned long long);
extern void add_iops_sample(struct thread_data *, struct io_u *,
unsigned int);
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH 2/9] stat: reduce arguments of add_log_sample()
2024-08-27 7:08 [PATCH 0/9] introduce the log_issue_time option Shin'ichiro Kawasaki
2024-08-27 7:08 ` [PATCH 1/9] stat: reduce arguments of add_*lat_sample() functions Shin'ichiro Kawasaki
@ 2024-08-27 7:08 ` Shin'ichiro Kawasaki
2024-08-27 7:08 ` [PATCH 3/9] iolog: refactor flush_samples() Shin'ichiro Kawasaki
` (7 subsequent siblings)
9 siblings, 0 replies; 25+ messages in thread
From: Shin'ichiro Kawasaki @ 2024-08-27 7:08 UTC (permalink / raw)
To: fio, Jens Axboe, Vincent Fu; +Cc: Damien Le Moal, Shin'ichiro Kawasaki
The number of arguments of add_log_sample() has increased as fields get
added to the log file format. Five parameters of them (data, ddir, bs,
offset and priority) are passed to __add_log_sample(). This makes the
function look more complicated and log field addition harder. To
simplify the function, pack the five arguments into the new struct
log_sample.
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
---
stat.c | 117 ++++++++++++++++++++++++++++++++++-----------------------
1 file changed, 70 insertions(+), 47 deletions(-)
diff --git a/stat.c b/stat.c
index fac6d534..03681b9d 100644
--- a/stat.c
+++ b/stat.c
@@ -24,6 +24,14 @@
#define LOG_MSEC_SLACK 1
#endif
+struct log_sample {
+ union io_sample_data data;
+ uint32_t ddir;
+ uint64_t bs;
+ uint64_t offset;
+ uint16_t priority;
+};
+
struct fio_sem *stat_sem;
void clear_rusage_stat(struct thread_data *td)
@@ -3031,17 +3039,15 @@ static struct io_logs *get_cur_log(struct io_log *iolog)
return iolog->pending;
}
-static void __add_log_sample(struct io_log *iolog, union io_sample_data data,
- enum fio_ddir ddir, unsigned long long bs,
- unsigned long t, uint64_t offset,
- unsigned int priority)
+static void __add_log_sample(struct io_log *iolog, unsigned long t,
+ struct log_sample *sample)
{
struct io_logs *cur_log;
if (iolog->disabled)
return;
if (flist_empty(&iolog->io_logs))
- iolog->avg_last[ddir] = t;
+ iolog->avg_last[sample->ddir] = t;
cur_log = get_cur_log(iolog);
if (cur_log) {
@@ -3049,18 +3055,18 @@ static void __add_log_sample(struct io_log *iolog, union io_sample_data data,
s = get_sample(iolog, cur_log, cur_log->nr_samples);
- s->data = data;
+ s->data = sample->data;
s->time = t;
if (iolog->td && iolog->td->o.log_alternate_epoch)
s->time += iolog->td->alternate_epoch;
- io_sample_set_ddir(iolog, s, ddir);
- s->bs = bs;
- s->priority = priority;
+ io_sample_set_ddir(iolog, s, sample->ddir);
+ s->bs = sample->bs;
+ s->priority = sample->priority;
if (iolog->log_offset) {
struct io_sample_offset *so = (void *) s;
- so->offset = offset;
+ so->offset = sample->offset;
}
cur_log->nr_samples++;
@@ -3157,20 +3163,21 @@ static void __add_stat_to_log(struct io_log *iolog, enum fio_ddir ddir,
* had actual samples done.
*/
if (iolog->avg_window[ddir].samples) {
- union io_sample_data data;
+ struct log_sample sample = { {{ 0, 0 }}, ddir, 0, 0, 0 };
+ union io_sample_data *d = &sample.data;
if (log_max == IO_LOG_SAMPLE_AVG) {
- data.val.val0 = iolog->avg_window[ddir].mean.u.f + 0.50;
- data.val.val1 = 0;
+ d->val.val0 = iolog->avg_window[ddir].mean.u.f + 0.50;
+ d->val.val1 = 0;
} else if (log_max == IO_LOG_SAMPLE_MAX) {
- data.val.val0 = iolog->avg_window[ddir].max_val;
- data.val.val1 = 0;
+ d->val.val0 = iolog->avg_window[ddir].max_val;
+ d->val.val1 = 0;
} else {
- data.val.val0 = iolog->avg_window[ddir].mean.u.f + 0.50;
- data.val.val1 = iolog->avg_window[ddir].max_val;
+ d->val.val0 = iolog->avg_window[ddir].mean.u.f + 0.50;
+ d->val.val1 = iolog->avg_window[ddir].max_val;
}
- __add_log_sample(iolog, data, ddir, 0, elapsed, 0, 0);
+ __add_log_sample(iolog, elapsed, &sample);
}
reset_io_stat(&iolog->avg_window[ddir]);
@@ -3187,11 +3194,10 @@ static void _add_stat_to_log(struct io_log *iolog, unsigned long elapsed,
static unsigned long add_log_sample(struct thread_data *td,
struct io_log *iolog,
- union io_sample_data data,
- enum fio_ddir ddir, unsigned long long bs,
- uint64_t offset, unsigned int ioprio)
+ struct log_sample *sample)
{
unsigned long elapsed, this_window;
+ enum fio_ddir ddir = sample->ddir;
if (!ddir_rw(ddir))
return 0;
@@ -3202,8 +3208,7 @@ static unsigned long add_log_sample(struct thread_data *td,
* If no time averaging, just add the log sample.
*/
if (!iolog->avg_msec) {
- __add_log_sample(iolog, data, ddir, bs, elapsed, offset,
- ioprio);
+ __add_log_sample(iolog, elapsed, sample);
return 0;
}
@@ -3211,7 +3216,7 @@ static unsigned long add_log_sample(struct thread_data *td,
* Add the sample. If the time period has passed, then
* add that entry to the log and clear.
*/
- add_stat_sample(&iolog->avg_window[ddir], data.val.val0);
+ add_stat_sample(&iolog->avg_window[ddir], sample->data.val.val0);
/*
* If period hasn't passed, adding the above sample is all we
@@ -3256,12 +3261,13 @@ void add_agg_sample(union io_sample_data data, enum fio_ddir ddir,
unsigned long long bs)
{
struct io_log *iolog;
+ struct log_sample sample = { data, ddir, bs, 0, 0 };
if (!ddir_rw(ddir))
return;
iolog = agg_io_log[ddir];
- __add_log_sample(iolog, data, ddir, bs, mtime_since_genesis(), 0, 0);
+ __add_log_sample(iolog, mtime_since_genesis(), &sample);
}
void add_sync_clat_sample(struct thread_stat *ts, unsigned long long nsec)
@@ -3331,9 +3337,12 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
add_stat_prio_sample(ts->clat_prio[ddir], clat_prio_index,
nsec);
- if (td->clat_log)
- add_log_sample(td, td->clat_log, sample_val(nsec), ddir, bs,
- offset, ioprio);
+ if (td->clat_log) {
+ struct log_sample sample = { sample_val(nsec), ddir, bs,
+ offset, ioprio };
+
+ add_log_sample(td, td->clat_log, &sample);
+ }
if (ts->clat_percentiles) {
/*
@@ -3359,6 +3368,8 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
if (this_window >= iolog->hist_msec) {
uint64_t *io_u_plat;
struct io_u_plat_entry *dst;
+ struct log_sample sample = { {{ 0, 0 }}, ddir, bs,
+ offset, ioprio };
/*
* Make a byte-for-byte copy of the latency histogram
@@ -3372,8 +3383,9 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
memcpy(&(dst->io_u_plat), io_u_plat,
FIO_IO_U_PLAT_NR * sizeof(uint64_t));
flist_add(&dst->list, &hw->list);
- __add_log_sample(iolog, sample_plat(dst), ddir, bs,
- elapsed, offset, ioprio);
+
+ sample.data = sample_plat(dst);
+ __add_log_sample(iolog, elapsed, &sample);
/*
* Update the last time we recorded as being now, minus
@@ -3407,9 +3419,12 @@ void add_slat_sample(struct thread_data *td, struct io_u *io_u)
add_stat_sample(&ts->slat_stat[ddir], nsec);
- if (td->slat_log)
- add_log_sample(td, td->slat_log, sample_val(nsec), ddir,
- io_u->xfer_buflen, io_u->offset, io_u->ioprio);
+ if (td->slat_log) {
+ struct log_sample sample = { sample_val(nsec), ddir,
+ io_u->xfer_buflen, io_u->offset, io_u->ioprio };
+
+ add_log_sample(td, td->slat_log, &sample);
+ }
if (ts->slat_percentiles)
add_lat_percentile_sample(ts, nsec, ddir, FIO_SLAT);
@@ -3433,9 +3448,12 @@ void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
add_stat_sample(&ts->lat_stat[ddir], nsec);
- if (td->lat_log)
- add_log_sample(td, td->lat_log, sample_val(nsec), ddir, bs,
- io_u->offset, io_u->ioprio);
+ if (td->lat_log) {
+ struct log_sample sample = { sample_val(nsec), ddir, bs,
+ io_u->offset, io_u->ioprio };
+
+ add_log_sample(td, td->lat_log, &sample);
+ }
/*
* When lat_percentiles=1 (default 0), the reported per priority
@@ -3475,9 +3493,12 @@ void add_bw_sample(struct thread_data *td, struct io_u *io_u,
add_stat_sample(&ts->bw_stat[io_u->ddir], rate);
- if (td->bw_log)
- add_log_sample(td, td->bw_log, sample_val(rate), io_u->ddir,
- bytes, io_u->offset, io_u->ioprio);
+ if (td->bw_log) {
+ struct log_sample sample = { sample_val(rate), io_u->ddir,
+ bytes, io_u->offset, io_u->ioprio };
+
+ add_log_sample(td, td->bw_log, &sample);
+ }
td->stat_io_bytes[io_u->ddir] = td->this_io_bytes[io_u->ddir];
@@ -3526,13 +3547,12 @@ static int __add_samples(struct thread_data *td, struct timespec *parent_tv,
add_stat_sample(&stat[ddir], rate);
if (log) {
- unsigned long long bs = 0;
+ struct log_sample sample = {
+ sample_val(rate), ddir, 0, 0, 0 };
if (td->o.min_bs[ddir] == td->o.max_bs[ddir])
- bs = td->o.min_bs[ddir];
-
- next = add_log_sample(td, log, sample_val(rate), ddir,
- bs, 0, 0);
+ sample.bs = td->o.min_bs[ddir];
+ next = add_log_sample(td, log, &sample);
next_log = min(next_log, next);
}
@@ -3570,9 +3590,12 @@ void add_iops_sample(struct thread_data *td, struct io_u *io_u,
add_stat_sample(&ts->iops_stat[io_u->ddir], 1);
- if (td->iops_log)
- add_log_sample(td, td->iops_log, sample_val(1), io_u->ddir,
- bytes, io_u->offset, io_u->ioprio);
+ if (td->iops_log) {
+ struct log_sample sample = { sample_val(1), io_u->ddir, bytes,
+ io_u->offset, io_u->ioprio };
+
+ add_log_sample(td, td->iops_log, &sample);
+ }
td->stat_io_blocks[io_u->ddir] = td->this_io_blocks[io_u->ddir];
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH 3/9] iolog: refactor flush_samples()
2024-08-27 7:08 [PATCH 0/9] introduce the log_issue_time option Shin'ichiro Kawasaki
2024-08-27 7:08 ` [PATCH 1/9] stat: reduce arguments of add_*lat_sample() functions Shin'ichiro Kawasaki
2024-08-27 7:08 ` [PATCH 2/9] stat: reduce arguments of add_log_sample() Shin'ichiro Kawasaki
@ 2024-08-27 7:08 ` Shin'ichiro Kawasaki
2024-08-28 16:56 ` Nick Neumann
2024-08-27 7:08 ` [PATCH 4/9] iolog: drop struct io_sample_offset Shin'ichiro Kawasaki
` (6 subsequent siblings)
9 siblings, 1 reply; 25+ messages in thread
From: Shin'ichiro Kawasaki @ 2024-08-27 7:08 UTC (permalink / raw)
To: fio, Jens Axboe, Vincent Fu; +Cc: Damien Le Moal, Shin'ichiro Kawasaki
flush_samples() controls the log file format depending on the options
log_avg_msec, log_window_value, log_offset and log_prio. It has deeply
nested branches to check the options. These nested branches make it
difficult to add more fields to the log file format. For ease of the log
file format improvements, refactor the function. Instead of checking all
conditions at once, check each condition one by one, generate small
strings for each field and add them to the final string to output. For
this purpose, introduce the new function print_sample_fields() which
generates each field string.
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
---
iolog.c | 131 +++++++++++++++++++++++++++++---------------------------
1 file changed, 67 insertions(+), 64 deletions(-)
diff --git a/iolog.c b/iolog.c
index f727c97f..06b51ea0 100644
--- a/iolog.c
+++ b/iolog.c
@@ -1009,13 +1009,42 @@ static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
}
}
+static int print_sample_fields(char **p, size_t *left, const char *fmt, ...) {
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = vsnprintf(*p, *left, fmt, ap);
+ if (ret < 0 || ret >= *left) {
+ log_err("sample file write failed: %d\n", ret);
+ return -1;
+ }
+ va_end(ap);
+
+ *p += ret;
+ *left -= ret;
+
+ return 0;
+}
+
+/*
+ * flush_samples - Generate output for log samples
+ * Each sample output is built using a temporary buffer. This buffer size
+ * assumptions are:
+ * - Each sample has less than 10 fields
+ * - Each sample field fits in 25 characters (20 digits for 64 bit number
+ * and a few bytes delimiter)
+ */
void flush_samples(FILE *f, void *samples, uint64_t sample_size)
{
struct io_sample *s;
- int log_offset, log_prio, log_avg_max;
+ struct io_sample_offset *sa;
+ bool log_offset, log_prio, log_avg_max;
uint64_t i, nr_samples;
- unsigned int prio_val;
- const char *fmt;
+ char buf[256];
+ char *p;
+ size_t left;
+ int ret;
if (!sample_size)
return;
@@ -1025,75 +1054,49 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size)
log_prio = (s->__ddir & LOG_PRIO_SAMPLE_BIT) != 0;
log_avg_max = (s->__ddir & LOG_AVG_MAX_SAMPLE_BIT) != 0;
- if (log_offset) {
- if (log_prio) {
- if (log_avg_max)
- fmt = "%" PRIu64 ", %" PRId64 ", %" PRId64 ", %u, %llu, %llu, 0x%04x\n";
- else
- fmt = "%" PRIu64 ", %" PRId64 ", %u, %llu, %llu, 0x%04x\n";
- } else {
- if (log_avg_max)
- fmt = "%" PRIu64 ", %" PRId64 ", %" PRId64 ", %u, %llu, %llu, %u\n";
- else
- fmt = "%" PRIu64 ", %" PRId64 ", %u, %llu, %llu, %u\n";
- }
- } else {
- if (log_prio) {
- if (log_avg_max)
- fmt = "%" PRIu64 ", %" PRId64 ", %" PRId64 ", %u, %llu, 0x%04x\n";
- else
- fmt = "%" PRIu64 ", %" PRId64 ", %u, %llu, 0x%04x\n";
- } else {
- if (log_avg_max)
- fmt = "%" PRIu64 ", %" PRId64 ", %" PRId64 ", %u, %llu, %u\n";
- else
- fmt = "%" PRIu64 ", %" PRId64 ", %u, %llu, %u\n";
- }
- }
-
nr_samples = sample_size / __log_entry_sz(log_offset);
for (i = 0; i < nr_samples; i++) {
s = __get_sample(samples, log_offset, i);
+ sa = (void *) s;
+ p = buf;
+ left = sizeof(buf);
+
+ ret = print_sample_fields(&p, &left, "%" PRIu64 ", %" PRId64,
+ s->time, s->data.val.val0);
+ if (ret)
+ return;
+
+ if (log_avg_max) {
+ ret = print_sample_fields(&p, &left, ", %" PRId64,
+ s->data.val.val1);
+ if (ret)
+ return;
+ }
+
+ ret = print_sample_fields(&p, &left, ", %u, %llu",
+ io_sample_ddir(s),
+ (unsigned long long) s->bs);
+ if (ret)
+ return;
+
+ if (log_offset) {
+ ret = print_sample_fields(&p, &left, ", %llu",
+ (unsigned long long) sa->offset);
+ if (ret)
+ return;
+ }
if (log_prio)
- prio_val = s->priority;
+ ret = print_sample_fields(&p, &left, ", 0x%04x",
+ s->priority);
else
- prio_val = ioprio_value_is_class_rt(s->priority);
+ ret = print_sample_fields(&p, &left, ", %u",
+ ioprio_value_is_class_rt(s->priority));
+ if (ret)
+ return;
- if (!log_offset) {
- if (log_avg_max)
- fprintf(f, fmt,
- s->time,
- s->data.val.val0,
- s->data.val.val1,
- io_sample_ddir(s), (unsigned long long) s->bs,
- prio_val);
- else
- fprintf(f, fmt,
- s->time,
- s->data.val.val0,
- io_sample_ddir(s), (unsigned long long) s->bs,
- prio_val);
- } else {
- struct io_sample_offset *so = (void *) s;
-
- if (log_avg_max)
- fprintf(f, fmt,
- s->time,
- s->data.val.val0,
- s->data.val.val1,
- io_sample_ddir(s), (unsigned long long) s->bs,
- (unsigned long long) so->offset,
- prio_val);
- else
- fprintf(f, fmt,
- s->time,
- s->data.val.val0,
- io_sample_ddir(s), (unsigned long long) s->bs,
- (unsigned long long) so->offset,
- prio_val);
- }
+ fprintf(f, "%s\n", buf);
}
}
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread* Re: [PATCH 3/9] iolog: refactor flush_samples()
2024-08-27 7:08 ` [PATCH 3/9] iolog: refactor flush_samples() Shin'ichiro Kawasaki
@ 2024-08-28 16:56 ` Nick Neumann
2024-08-29 5:44 ` Shinichiro Kawasaki
0 siblings, 1 reply; 25+ messages in thread
From: Nick Neumann @ 2024-08-28 16:56 UTC (permalink / raw)
To: Shin'ichiro Kawasaki; +Cc: fio, Jens Axboe, Vincent Fu, Damien Le Moal
One quick observation though I'm guessing you probably have already
checked it. It would be good to be sure the performance after this
refactor for outputting the log files is similar or bettter, due to
how long it can take to output per-I/O log files.
Thanks,
Nick
On Tue, Aug 27, 2024 at 2:09 AM Shin'ichiro Kawasaki
<shinichiro.kawasaki@wdc.com> wrote:
>
> flush_samples() controls the log file format depending on the options
> log_avg_msec, log_window_value, log_offset and log_prio. It has deeply
> nested branches to check the options. These nested branches make it
> difficult to add more fields to the log file format. For ease of the log
> file format improvements, refactor the function. Instead of checking all
> conditions at once, check each condition one by one, generate small
> strings for each field and add them to the final string to output. For
> this purpose, introduce the new function print_sample_fields() which
> generates each field string.
>
> Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
> ---
> iolog.c | 131 +++++++++++++++++++++++++++++---------------------------
> 1 file changed, 67 insertions(+), 64 deletions(-)
>
> diff --git a/iolog.c b/iolog.c
> index f727c97f..06b51ea0 100644
> --- a/iolog.c
> +++ b/iolog.c
> @@ -1009,13 +1009,42 @@ static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
> }
> }
>
> +static int print_sample_fields(char **p, size_t *left, const char *fmt, ...) {
> + va_list ap;
> + int ret;
> +
> + va_start(ap, fmt);
> + ret = vsnprintf(*p, *left, fmt, ap);
> + if (ret < 0 || ret >= *left) {
> + log_err("sample file write failed: %d\n", ret);
> + return -1;
> + }
> + va_end(ap);
> +
> + *p += ret;
> + *left -= ret;
> +
> + return 0;
> +}
> +
> +/*
> + * flush_samples - Generate output for log samples
> + * Each sample output is built using a temporary buffer. This buffer size
> + * assumptions are:
> + * - Each sample has less than 10 fields
> + * - Each sample field fits in 25 characters (20 digits for 64 bit number
> + * and a few bytes delimiter)
> + */
> void flush_samples(FILE *f, void *samples, uint64_t sample_size)
> {
> struct io_sample *s;
> - int log_offset, log_prio, log_avg_max;
> + struct io_sample_offset *sa;
> + bool log_offset, log_prio, log_avg_max;
> uint64_t i, nr_samples;
> - unsigned int prio_val;
> - const char *fmt;
> + char buf[256];
> + char *p;
> + size_t left;
> + int ret;
>
> if (!sample_size)
> return;
> @@ -1025,75 +1054,49 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size)
> log_prio = (s->__ddir & LOG_PRIO_SAMPLE_BIT) != 0;
> log_avg_max = (s->__ddir & LOG_AVG_MAX_SAMPLE_BIT) != 0;
>
> - if (log_offset) {
> - if (log_prio) {
> - if (log_avg_max)
> - fmt = "%" PRIu64 ", %" PRId64 ", %" PRId64 ", %u, %llu, %llu, 0x%04x\n";
> - else
> - fmt = "%" PRIu64 ", %" PRId64 ", %u, %llu, %llu, 0x%04x\n";
> - } else {
> - if (log_avg_max)
> - fmt = "%" PRIu64 ", %" PRId64 ", %" PRId64 ", %u, %llu, %llu, %u\n";
> - else
> - fmt = "%" PRIu64 ", %" PRId64 ", %u, %llu, %llu, %u\n";
> - }
> - } else {
> - if (log_prio) {
> - if (log_avg_max)
> - fmt = "%" PRIu64 ", %" PRId64 ", %" PRId64 ", %u, %llu, 0x%04x\n";
> - else
> - fmt = "%" PRIu64 ", %" PRId64 ", %u, %llu, 0x%04x\n";
> - } else {
> - if (log_avg_max)
> - fmt = "%" PRIu64 ", %" PRId64 ", %" PRId64 ", %u, %llu, %u\n";
> - else
> - fmt = "%" PRIu64 ", %" PRId64 ", %u, %llu, %u\n";
> - }
> - }
> -
> nr_samples = sample_size / __log_entry_sz(log_offset);
>
> for (i = 0; i < nr_samples; i++) {
> s = __get_sample(samples, log_offset, i);
> + sa = (void *) s;
> + p = buf;
> + left = sizeof(buf);
> +
> + ret = print_sample_fields(&p, &left, "%" PRIu64 ", %" PRId64,
> + s->time, s->data.val.val0);
> + if (ret)
> + return;
> +
> + if (log_avg_max) {
> + ret = print_sample_fields(&p, &left, ", %" PRId64,
> + s->data.val.val1);
> + if (ret)
> + return;
> + }
> +
> + ret = print_sample_fields(&p, &left, ", %u, %llu",
> + io_sample_ddir(s),
> + (unsigned long long) s->bs);
> + if (ret)
> + return;
> +
> + if (log_offset) {
> + ret = print_sample_fields(&p, &left, ", %llu",
> + (unsigned long long) sa->offset);
> + if (ret)
> + return;
> + }
>
> if (log_prio)
> - prio_val = s->priority;
> + ret = print_sample_fields(&p, &left, ", 0x%04x",
> + s->priority);
> else
> - prio_val = ioprio_value_is_class_rt(s->priority);
> + ret = print_sample_fields(&p, &left, ", %u",
> + ioprio_value_is_class_rt(s->priority));
> + if (ret)
> + return;
>
> - if (!log_offset) {
> - if (log_avg_max)
> - fprintf(f, fmt,
> - s->time,
> - s->data.val.val0,
> - s->data.val.val1,
> - io_sample_ddir(s), (unsigned long long) s->bs,
> - prio_val);
> - else
> - fprintf(f, fmt,
> - s->time,
> - s->data.val.val0,
> - io_sample_ddir(s), (unsigned long long) s->bs,
> - prio_val);
> - } else {
> - struct io_sample_offset *so = (void *) s;
> -
> - if (log_avg_max)
> - fprintf(f, fmt,
> - s->time,
> - s->data.val.val0,
> - s->data.val.val1,
> - io_sample_ddir(s), (unsigned long long) s->bs,
> - (unsigned long long) so->offset,
> - prio_val);
> - else
> - fprintf(f, fmt,
> - s->time,
> - s->data.val.val0,
> - io_sample_ddir(s), (unsigned long long) s->bs,
> - (unsigned long long) so->offset,
> - prio_val);
> - }
> + fprintf(f, "%s\n", buf);
> }
> }
>
> --
> 2.45.2
>
>
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH 3/9] iolog: refactor flush_samples()
2024-08-28 16:56 ` Nick Neumann
@ 2024-08-29 5:44 ` Shinichiro Kawasaki
0 siblings, 0 replies; 25+ messages in thread
From: Shinichiro Kawasaki @ 2024-08-29 5:44 UTC (permalink / raw)
To: Nick Neumann; +Cc: fio@vger.kernel.org, Jens Axboe, Vincent Fu, Damien Le Moal
On Aug 28, 2024 / 11:56, Nick Neumann wrote:
> One quick observation though I'm guessing you probably have already
> checked it. It would be good to be sure the performance after this
> refactor for outputting the log files is similar or bettter, due to
> how long it can take to output per-I/O log files.
Good question. To understand the performance, I used gperf and compared
runtime of flush_samples() with and without the series.
I built the fio with these commands,
$ LDFLAGS=-pg ./configure --extra-cflags="-pg -O0"
$ make -j$(nproc)
Then ran fio and gperf as follows:
$ ./fio --name=w --filename=/dev/zero --rw=read --bs=4k --size=128M --time_based --runtime=1m --write_lat_log=lat --log_offset=1 --thread=1
$ gprof -p ./fio ./gmon.out | grep flush_samples
4.96 3.73 0.63 59 0.01 0.02 flush_samples
The second number "3.73 "is the cumulative runtime of flush_samples(). It was
3.73 seconds with the series, and 3.50 seconds without the series. This
indicates that around 1% overhead is added to the I/O log file output. Fio users
will need to wait a little bit longer to get the I/O log files after the
fio job completion. I hope this overhead is acceptable.
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 4/9] iolog: drop struct io_sample_offset
2024-08-27 7:08 [PATCH 0/9] introduce the log_issue_time option Shin'ichiro Kawasaki
` (2 preceding siblings ...)
2024-08-27 7:08 ` [PATCH 3/9] iolog: refactor flush_samples() Shin'ichiro Kawasaki
@ 2024-08-27 7:08 ` Shin'ichiro Kawasaki
2024-08-27 7:08 ` [PATCH 5/9] introduce the log_issue_time option Shin'ichiro Kawasaki
` (5 subsequent siblings)
9 siblings, 0 replies; 25+ messages in thread
From: Shin'ichiro Kawasaki @ 2024-08-27 7:08 UTC (permalink / raw)
To: fio, Jens Axboe, Vincent Fu; +Cc: Damien Le Moal, Shin'ichiro Kawasaki
Fio uses the struct io_sample to log attributes of each IO. When the
log_offset option is set, fio uses the struct io_sample_offset instead
which has the additional field dedicated to the offset data. Fio chooses
one of these two structs by the log_offset option to minimize memory
usage for IO sampling. However, the dedicated struct io_sample_offset is
not flexible and makes it difficult to add new sampling items.
To allow adding new sampling items, drop the struct io_sample_offset.
Instead, introduce the variable length array "uint64_t aux[]" at the
end of the struct io_sample which holds any auxiliary sampling data.
At this moment, it holds only one item "offset" as the first array
element. The following patch will add a new item.
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
---
client.c | 8 +++-----
iolog.c | 4 +---
iolog.h | 19 ++++++++++++-------
server.c | 8 +++-----
stat.c | 7 ++-----
5 files changed, 21 insertions(+), 25 deletions(-)
diff --git a/client.c b/client.c
index 4cb7dffe..0b6044dd 100644
--- a/client.c
+++ b/client.c
@@ -1726,11 +1726,9 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
s->bs = le64_to_cpu(s->bs);
s->priority = le16_to_cpu(s->priority);
- if (ret->log_offset) {
- struct io_sample_offset *so = (void *) s;
-
- so->offset = le64_to_cpu(so->offset);
- }
+ if (ret->log_offset)
+ s->aux[IOS_AUX_OFFSET_INDEX] =
+ le64_to_cpu(s->aux[IOS_AUX_OFFSET_INDEX]);
if (ret->log_type == IO_LOG_TYPE_HIST) {
s->data.plat_entry = (struct io_u_plat_entry *)(((char *)s) + sizeof(*s));
diff --git a/iolog.c b/iolog.c
index 06b51ea0..bb1ade86 100644
--- a/iolog.c
+++ b/iolog.c
@@ -1038,7 +1038,6 @@ static int print_sample_fields(char **p, size_t *left, const char *fmt, ...) {
void flush_samples(FILE *f, void *samples, uint64_t sample_size)
{
struct io_sample *s;
- struct io_sample_offset *sa;
bool log_offset, log_prio, log_avg_max;
uint64_t i, nr_samples;
char buf[256];
@@ -1058,7 +1057,6 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size)
for (i = 0; i < nr_samples; i++) {
s = __get_sample(samples, log_offset, i);
- sa = (void *) s;
p = buf;
left = sizeof(buf);
@@ -1082,7 +1080,7 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size)
if (log_offset) {
ret = print_sample_fields(&p, &left, ", %llu",
- (unsigned long long) sa->offset);
+ (unsigned long long) s->aux[IOS_AUX_OFFSET_INDEX]);
if (ret)
return;
}
diff --git a/iolog.h b/iolog.h
index 26dd5cca..d868921b 100644
--- a/iolog.h
+++ b/iolog.h
@@ -54,11 +54,14 @@ struct io_sample {
uint32_t __ddir;
uint16_t priority;
uint64_t bs;
+ uint64_t aux[];
};
-struct io_sample_offset {
- struct io_sample s;
- uint64_t offset;
+/*
+ * Enumerate indexes of auxiliary log data in struct io_sample aux[] array
+ */
+enum {
+ IOS_AUX_OFFSET_INDEX,
};
enum {
@@ -180,12 +183,14 @@ static inline void io_sample_set_ddir(struct io_log *log,
io->__ddir = ddir | log->log_ddir_mask;
}
-static inline size_t __log_entry_sz(int log_offset)
+static inline size_t __log_entry_sz(bool log_offset)
{
+ size_t ret = sizeof(struct io_sample);
+
if (log_offset)
- return sizeof(struct io_sample_offset);
- else
- return sizeof(struct io_sample);
+ ret += sizeof(uint64_t);
+
+ return ret;
}
static inline size_t log_entry_sz(struct io_log *log)
diff --git a/server.c b/server.c
index afaeb348..dbb9eb5e 100644
--- a/server.c
+++ b/server.c
@@ -2295,11 +2295,9 @@ int fio_send_iolog(struct thread_data *td, struct io_log *log, const char *name)
s->__ddir = __cpu_to_le32(s->__ddir);
s->bs = cpu_to_le64(s->bs);
- if (log->log_offset) {
- struct io_sample_offset *so = (void *) s;
-
- so->offset = cpu_to_le64(so->offset);
- }
+ if (log->log_offset)
+ s->aux[IOS_AUX_OFFSET_INDEX] =
+ cpu_to_le64(s->aux[IOS_AUX_OFFSET_INDEX]);
}
}
diff --git a/stat.c b/stat.c
index 03681b9d..56d04e7a 100644
--- a/stat.c
+++ b/stat.c
@@ -3063,11 +3063,8 @@ static void __add_log_sample(struct io_log *iolog, unsigned long t,
s->bs = sample->bs;
s->priority = sample->priority;
- if (iolog->log_offset) {
- struct io_sample_offset *so = (void *) s;
-
- so->offset = sample->offset;
- }
+ if (iolog->log_offset)
+ s->aux[IOS_AUX_OFFSET_INDEX] = sample->offset;
cur_log->nr_samples++;
return;
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH 5/9] introduce the log_issue_time option
2024-08-27 7:08 [PATCH 0/9] introduce the log_issue_time option Shin'ichiro Kawasaki
` (3 preceding siblings ...)
2024-08-27 7:08 ` [PATCH 4/9] iolog: drop struct io_sample_offset Shin'ichiro Kawasaki
@ 2024-08-27 7:08 ` Shin'ichiro Kawasaki
2024-08-28 17:02 ` Jens Axboe
` (2 more replies)
2024-08-27 7:08 ` [PATCH 6/9] doc: fix the descriptions of the log_prio option Shin'ichiro Kawasaki
` (4 subsequent siblings)
9 siblings, 3 replies; 25+ messages in thread
From: Shin'ichiro Kawasaki @ 2024-08-27 7:08 UTC (permalink / raw)
To: fio, Jens Axboe, Vincent Fu; +Cc: Damien Le Moal, Shin'ichiro Kawasaki
When write_lat_log option is set, fio outputs the 'clat' completion
latency log file. This clat log can be used to analyze IO latency. This
file can also be considered as an IO trace file as each IO entry can
fully describe an IO when the --log_offset and --log_prio options are
also used.
However, using the clat log file as an IO trace is inaccuate due to two
reasons. Firstly, the time field of each entry uses millisecond units,
which is too coarse for fast IOs (e.g. when using SSDs). Secondly, the
time field value is recorded not at command completion, but at log
sample recording. The time field value is slightly different from the IO
completion time. It can be used only as an approximated completion time.
To analyze IO issue time and IO completion time accurately using the
clat log, introduce the new option 'log_issue_time'. When this option is
set, add another field to the log file entries and put the IO issue time
in nanosecond to the field. The IO completion time can be calculated by
adding the completion latency to the IO issue time.
The IO issue time field is added to 'slat' submit latency log file also.
This helps to calculate IO start time by subtracting the submission
latency from the IO issue time.
The log_issue_time option can be used for IO trace when the
write_lat_log option and the log_offset options are set together. When
the log_issue_time option is set but the write_lat_log option or the
log_offset option is not set, fio errors out. When the log_issue_time
option and the write_lat_log option are set together with other
write_X_log options, the IO issue time field is added to all log files.
As for the other log files than clat and slat log, the IO issue time
does not have meaning then '0' is set to the field. When log_avg_msec
option is set, average of the log values of the specified duration is
logged. The IO issue time does not have meaning in this case either and
'0' is set to the field.
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
---
cconv.c | 2 ++
client.c | 30 ++++++++++++++++++++----------
init.c | 12 ++++++++++++
iolog.c | 29 +++++++++++++++++++++--------
iolog.h | 30 ++++++++++++++++++++++++------
options.c | 10 ++++++++++
server.c | 4 ++++
server.h | 1 +
stat.c | 24 +++++++++++++++---------
thread_options.h | 2 ++
10 files changed, 111 insertions(+), 33 deletions(-)
diff --git a/cconv.c b/cconv.c
index 1d494982..9571f1a8 100644
--- a/cconv.c
+++ b/cconv.c
@@ -216,6 +216,7 @@ int convert_thread_options_to_cpu(struct thread_options *o,
o->log_max = le32_to_cpu(top->log_max);
o->log_offset = le32_to_cpu(top->log_offset);
o->log_prio = le32_to_cpu(top->log_prio);
+ o->log_issue_time = le32_to_cpu(top->log_issue_time);
o->log_gz = le32_to_cpu(top->log_gz);
o->log_gz_store = le32_to_cpu(top->log_gz_store);
o->log_alternate_epoch = le32_to_cpu(top->log_alternate_epoch);
@@ -458,6 +459,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
top->log_max = cpu_to_le32(o->log_max);
top->log_offset = cpu_to_le32(o->log_offset);
top->log_prio = cpu_to_le32(o->log_prio);
+ top->log_issue_time = cpu_to_le32(o->log_issue_time);
top->log_gz = cpu_to_le32(o->log_gz);
top->log_gz_store = cpu_to_le32(o->log_gz_store);
top->log_alternate_epoch = cpu_to_le32(o->log_alternate_epoch);
diff --git a/client.c b/client.c
index 0b6044dd..5964faba 100644
--- a/client.c
+++ b/client.c
@@ -1388,8 +1388,8 @@ static void handle_eta(struct fio_client *client, struct fio_net_cmd *cmd)
static void client_flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
uint64_t sample_size)
{
- struct io_sample *s;
- int log_offset;
+ struct io_sample *s, *s_tmp;
+ bool log_offset, log_issue_time;
uint64_t i, j, nr_samples;
struct io_u_plat_entry *entry;
uint64_t *io_u_plat;
@@ -1399,15 +1399,17 @@ static void client_flush_hist_samples(FILE *f, int hist_coarseness, void *sample
if (!sample_size)
return;
- s = __get_sample(samples, 0, 0);
+ s = __get_sample(samples, 0, 0, 0);
log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
+ log_issue_time = (s->__ddir & LOG_ISSUE_TIME_SAMPLE_BIT) != 0;
- nr_samples = sample_size / __log_entry_sz(log_offset);
+ nr_samples = sample_size / __log_entry_sz(log_offset, log_issue_time);
for (i = 0; i < nr_samples; i++) {
- s = (struct io_sample *)((char *)__get_sample(samples, log_offset, i) +
- i * sizeof(struct io_u_plat_entry));
+ s_tmp = __get_sample(samples, log_offset, log_issue_time, i);
+ s = (struct io_sample *)((char *)s_tmp +
+ i * sizeof(struct io_u_plat_entry));
entry = s->data.plat_entry;
io_u_plat = entry->io_u_plat;
@@ -1595,6 +1597,7 @@ static struct cmd_iolog_pdu *convert_iolog_gz(struct fio_net_cmd *cmd,
uint64_t nr_samples;
size_t total;
char *p;
+ size_t log_entry_size;
stream.zalloc = Z_NULL;
stream.zfree = Z_NULL;
@@ -1610,11 +1613,13 @@ static struct cmd_iolog_pdu *convert_iolog_gz(struct fio_net_cmd *cmd,
*/
nr_samples = le64_to_cpu(pdu->nr_samples);
+ log_entry_size = __log_entry_sz(le32_to_cpu(pdu->log_offset),
+ le32_to_cpu(pdu->log_issue_time));
if (pdu->log_type == IO_LOG_TYPE_HIST)
- total = nr_samples * (__log_entry_sz(le32_to_cpu(pdu->log_offset)) +
- sizeof(struct io_u_plat_entry));
+ total = nr_samples * (log_entry_size +
+ sizeof(struct io_u_plat_entry));
else
- total = nr_samples * __log_entry_sz(le32_to_cpu(pdu->log_offset));
+ total = nr_samples * log_entry_size;
ret = malloc(total + sizeof(*pdu));
ret->nr_samples = nr_samples;
@@ -1703,6 +1708,7 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
ret->compressed = le32_to_cpu(ret->compressed);
ret->log_offset = le32_to_cpu(ret->log_offset);
ret->log_prio = le32_to_cpu(ret->log_prio);
+ ret->log_issue_time = le32_to_cpu(ret->log_issue_time);
ret->log_hist_coarseness = le32_to_cpu(ret->log_hist_coarseness);
ret->per_job_logs = le32_to_cpu(ret->per_job_logs);
@@ -1713,7 +1719,7 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
for (i = 0; i < ret->nr_samples; i++) {
struct io_sample *s;
- s = __get_sample(samples, ret->log_offset, i);
+ s = __get_sample(samples, ret->log_offset, ret->log_issue_time, i);
if (ret->log_type == IO_LOG_TYPE_HIST)
s = (struct io_sample *)((char *)s + sizeof(struct io_u_plat_entry) * i);
@@ -1730,6 +1736,10 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
s->aux[IOS_AUX_OFFSET_INDEX] =
le64_to_cpu(s->aux[IOS_AUX_OFFSET_INDEX]);
+ if (ret->log_issue_time)
+ s->aux[IOS_AUX_ISSUE_TIME_INDEX] =
+ le64_to_cpu(s->aux[IOS_AUX_ISSUE_TIME_INDEX]);
+
if (ret->log_type == IO_LOG_TYPE_HIST) {
s->data.plat_entry = (struct io_u_plat_entry *)(((char *)s) + sizeof(*s));
s->data.plat_entry->list.next = NULL;
diff --git a/init.c b/init.c
index ff3e9a90..3b213c0e 100644
--- a/init.c
+++ b/init.c
@@ -1621,12 +1621,18 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
.log_type = IO_LOG_TYPE_LAT,
.log_offset = o->log_offset,
.log_prio = o->log_prio,
+ .log_issue_time = o->log_issue_time,
.log_gz = o->log_gz,
.log_gz_store = o->log_gz_store,
};
const char *pre = make_log_name(o->lat_log_file, o->name);
const char *suf;
+ if (o->log_issue_time && !o->log_offset) {
+ log_err("fio: log_issue_time option requires log_write_lat_log and log_offset options\n");
+ goto err;
+ }
+
if (p.log_gz_store)
suf = "log.fz";
else
@@ -1650,6 +1656,9 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
setup_log(&td->clat_log, &p, logname);
}
+ } else if (o->log_issue_time) {
+ log_err("fio: log_issue_time option requires log_write_lat_log and log_offset options\n");
+ goto err;
}
if (o->write_hist_log) {
@@ -1661,6 +1670,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
.log_type = IO_LOG_TYPE_HIST,
.log_offset = o->log_offset,
.log_prio = o->log_prio,
+ .log_issue_time = o->log_issue_time,
.log_gz = o->log_gz,
.log_gz_store = o->log_gz_store,
};
@@ -1693,6 +1703,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
.log_type = IO_LOG_TYPE_BW,
.log_offset = o->log_offset,
.log_prio = o->log_prio,
+ .log_issue_time = o->log_issue_time,
.log_gz = o->log_gz,
.log_gz_store = o->log_gz_store,
};
@@ -1725,6 +1736,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
.log_type = IO_LOG_TYPE_IOPS,
.log_offset = o->log_offset,
.log_prio = o->log_prio,
+ .log_issue_time = o->log_issue_time,
.log_gz = o->log_gz,
.log_gz_store = o->log_gz_store,
};
diff --git a/iolog.c b/iolog.c
index bb1ade86..aadb0d05 100644
--- a/iolog.c
+++ b/iolog.c
@@ -845,6 +845,7 @@ void setup_log(struct io_log **log, struct log_params *p,
l->log_type = p->log_type;
l->log_offset = p->log_offset;
l->log_prio = p->log_prio;
+ l->log_issue_time = p->log_issue_time;
l->log_gz = p->log_gz;
l->log_gz_store = p->log_gz_store;
l->avg_msec = p->avg_msec;
@@ -887,6 +888,9 @@ void setup_log(struct io_log **log, struct log_params *p,
if (l->td && l->td->o.log_max == IO_LOG_SAMPLE_BOTH)
l->log_ddir_mask |= LOG_AVG_MAX_SAMPLE_BIT;
+ if (l->log_issue_time)
+ l->log_ddir_mask |= LOG_ISSUE_TIME_SAMPLE_BIT;
+
INIT_FLIST_HEAD(&l->chunk_list);
if (l->log_gz && !p->td)
@@ -969,7 +973,7 @@ static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
uint64_t sample_size)
{
struct io_sample *s;
- int log_offset;
+ bool log_offset, log_issue_time;
uint64_t i, j, nr_samples;
struct io_u_plat_entry *entry, *entry_before;
uint64_t *io_u_plat;
@@ -980,13 +984,14 @@ static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
if (!sample_size)
return;
- s = __get_sample(samples, 0, 0);
+ s = __get_sample(samples, 0, 0, 0);
log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
+ log_issue_time = (s->__ddir & LOG_ISSUE_TIME_SAMPLE_BIT) != 0;
- nr_samples = sample_size / __log_entry_sz(log_offset);
+ nr_samples = sample_size / __log_entry_sz(log_offset, log_issue_time);
for (i = 0; i < nr_samples; i++) {
- s = __get_sample(samples, log_offset, i);
+ s = __get_sample(samples, log_offset, log_issue_time, i);
entry = s->data.plat_entry;
io_u_plat = entry->io_u_plat;
@@ -1038,7 +1043,7 @@ static int print_sample_fields(char **p, size_t *left, const char *fmt, ...) {
void flush_samples(FILE *f, void *samples, uint64_t sample_size)
{
struct io_sample *s;
- bool log_offset, log_prio, log_avg_max;
+ bool log_offset, log_prio, log_avg_max, log_issue_time;
uint64_t i, nr_samples;
char buf[256];
char *p;
@@ -1048,15 +1053,16 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size)
if (!sample_size)
return;
- s = __get_sample(samples, 0, 0);
+ s = __get_sample(samples, 0, 0, 0);
log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
log_prio = (s->__ddir & LOG_PRIO_SAMPLE_BIT) != 0;
log_avg_max = (s->__ddir & LOG_AVG_MAX_SAMPLE_BIT) != 0;
+ log_issue_time = (s->__ddir & LOG_ISSUE_TIME_SAMPLE_BIT) != 0;
- nr_samples = sample_size / __log_entry_sz(log_offset);
+ nr_samples = sample_size / __log_entry_sz(log_offset, log_issue_time);
for (i = 0; i < nr_samples; i++) {
- s = __get_sample(samples, log_offset, i);
+ s = __get_sample(samples, log_offset, log_issue_time, i);
p = buf;
left = sizeof(buf);
@@ -1094,6 +1100,13 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size)
if (ret)
return;
+ if (log_issue_time) {
+ ret = print_sample_fields(&p, &left, ", %llu",
+ (unsigned long long) s->aux[IOS_AUX_ISSUE_TIME_INDEX]);
+ if (ret)
+ return;
+ }
+
fprintf(f, "%s\n", buf);
}
}
diff --git a/iolog.h b/iolog.h
index d868921b..e864d169 100644
--- a/iolog.h
+++ b/iolog.h
@@ -62,6 +62,7 @@ struct io_sample {
*/
enum {
IOS_AUX_OFFSET_INDEX,
+ IOS_AUX_ISSUE_TIME_INDEX,
};
enum {
@@ -122,6 +123,11 @@ struct io_log {
*/
unsigned int log_prio;
+ /*
+ * Log I/O issuing time
+ */
+ unsigned int log_issue_time;
+
/*
* Max size of log entries before a chunk is compressed
*/
@@ -171,9 +177,14 @@ struct io_log {
* If the bit following prioity sample vit is set, we report both avg and max
*/
#define LOG_AVG_MAX_SAMPLE_BIT 0x20000000U
+/*
+ * If the bit following AVG_MAX_SAMPLE_BIT is set, we report the issue time also
+ */
+#define LOG_ISSUE_TIME_SAMPLE_BIT 0x10000000U
#define LOG_SAMPLE_BITS (LOG_OFFSET_SAMPLE_BIT | LOG_PRIO_SAMPLE_BIT |\
- LOG_AVG_MAX_SAMPLE_BIT)
+ LOG_AVG_MAX_SAMPLE_BIT |\
+ LOG_ISSUE_TIME_SAMPLE_BIT)
#define io_sample_ddir(io) ((io)->__ddir & ~LOG_SAMPLE_BITS)
static inline void io_sample_set_ddir(struct io_log *log,
@@ -183,19 +194,22 @@ static inline void io_sample_set_ddir(struct io_log *log,
io->__ddir = ddir | log->log_ddir_mask;
}
-static inline size_t __log_entry_sz(bool log_offset)
+static inline size_t __log_entry_sz(bool log_offset, bool log_issue_time)
{
size_t ret = sizeof(struct io_sample);
if (log_offset)
ret += sizeof(uint64_t);
+ if (log_issue_time)
+ ret += sizeof(uint64_t);
+
return ret;
}
static inline size_t log_entry_sz(struct io_log *log)
{
- return __log_entry_sz(log->log_offset);
+ return __log_entry_sz(log->log_offset, log->log_issue_time);
}
static inline size_t log_sample_sz(struct io_log *log, struct io_logs *cur_log)
@@ -203,10 +217,12 @@ static inline size_t log_sample_sz(struct io_log *log, struct io_logs *cur_log)
return cur_log->nr_samples * log_entry_sz(log);
}
-static inline struct io_sample *__get_sample(void *samples, int log_offset,
+static inline struct io_sample *__get_sample(void *samples, bool log_offset,
+ bool log_issue_time,
uint64_t sample)
{
- uint64_t sample_offset = sample * __log_entry_sz(log_offset);
+ uint64_t sample_offset = sample *
+ __log_entry_sz(log_offset, log_issue_time);
return (struct io_sample *) ((char *) samples + sample_offset);
}
@@ -219,7 +235,8 @@ static inline struct io_sample *get_sample(struct io_log *iolog,
struct io_logs *cur_log,
uint64_t sample)
{
- return __get_sample(cur_log->log, iolog->log_offset, sample);
+ return __get_sample(cur_log->log,
+ iolog->log_offset, iolog->log_issue_time, sample);
}
enum {
@@ -295,6 +312,7 @@ struct log_params {
int log_type;
int log_offset;
int log_prio;
+ int log_issue_time;
int log_gz;
int log_gz_store;
int log_compress;
diff --git a/options.c b/options.c
index ab650bb7..73fd4a7f 100644
--- a/options.c
+++ b/options.c
@@ -4727,6 +4727,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.category = FIO_OPT_C_LOG,
.group = FIO_OPT_G_INVALID,
},
+ {
+ .name = "log_issue_time",
+ .lname = "Log IO issue time",
+ .type = FIO_OPT_BOOL,
+ .off1 = offsetof(struct thread_options, log_issue_time),
+ .help = "Include IO issue time for each log entry",
+ .def = "0",
+ .category = FIO_OPT_C_LOG,
+ .group = FIO_OPT_G_INVALID,
+ },
#ifdef CONFIG_ZLIB
{
.name = "log_compression",
diff --git a/server.c b/server.c
index dbb9eb5e..5967f421 100644
--- a/server.c
+++ b/server.c
@@ -2298,6 +2298,10 @@ int fio_send_iolog(struct thread_data *td, struct io_log *log, const char *name)
if (log->log_offset)
s->aux[IOS_AUX_OFFSET_INDEX] =
cpu_to_le64(s->aux[IOS_AUX_OFFSET_INDEX]);
+
+ if (log->log_issue_time)
+ s->aux[IOS_AUX_ISSUE_TIME_INDEX] =
+ cpu_to_le64(s->aux[IOS_AUX_ISSUE_TIME_INDEX]);
}
}
diff --git a/server.h b/server.h
index a8e4dbf2..2157da97 100644
--- a/server.h
+++ b/server.h
@@ -197,6 +197,7 @@ struct cmd_iolog_pdu {
uint32_t compressed;
uint32_t log_offset;
uint32_t log_prio;
+ uint32_t log_issue_time;
uint32_t log_hist_coarseness;
uint32_t per_job_logs;
uint8_t name[FIO_NET_NAME_MAX];
diff --git a/stat.c b/stat.c
index 56d04e7a..2cfd6819 100644
--- a/stat.c
+++ b/stat.c
@@ -30,6 +30,7 @@ struct log_sample {
uint64_t bs;
uint64_t offset;
uint16_t priority;
+ uint64_t issue_time;
};
struct fio_sem *stat_sem;
@@ -3066,6 +3067,9 @@ static void __add_log_sample(struct io_log *iolog, unsigned long t,
if (iolog->log_offset)
s->aux[IOS_AUX_OFFSET_INDEX] = sample->offset;
+ if (iolog->log_issue_time)
+ s->aux[IOS_AUX_ISSUE_TIME_INDEX] = sample->issue_time;
+
cur_log->nr_samples++;
return;
}
@@ -3160,7 +3164,7 @@ static void __add_stat_to_log(struct io_log *iolog, enum fio_ddir ddir,
* had actual samples done.
*/
if (iolog->avg_window[ddir].samples) {
- struct log_sample sample = { {{ 0, 0 }}, ddir, 0, 0, 0 };
+ struct log_sample sample = { {{ 0, 0 }}, ddir, 0, 0, 0, 0 };
union io_sample_data *d = &sample.data;
if (log_max == IO_LOG_SAMPLE_AVG) {
@@ -3258,7 +3262,7 @@ void add_agg_sample(union io_sample_data data, enum fio_ddir ddir,
unsigned long long bs)
{
struct io_log *iolog;
- struct log_sample sample = { data, ddir, bs, 0, 0 };
+ struct log_sample sample = { data, ddir, bs, 0, 0, 0 };
if (!ddir_rw(ddir))
return;
@@ -3336,7 +3340,8 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
if (td->clat_log) {
struct log_sample sample = { sample_val(nsec), ddir, bs,
- offset, ioprio };
+ offset, ioprio,
+ ntime_since(&td->epoch, &io_u->issue_time) };
add_log_sample(td, td->clat_log, &sample);
}
@@ -3366,7 +3371,7 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
uint64_t *io_u_plat;
struct io_u_plat_entry *dst;
struct log_sample sample = { {{ 0, 0 }}, ddir, bs,
- offset, ioprio };
+ offset, ioprio, 0 };
/*
* Make a byte-for-byte copy of the latency histogram
@@ -3418,7 +3423,8 @@ void add_slat_sample(struct thread_data *td, struct io_u *io_u)
if (td->slat_log) {
struct log_sample sample = { sample_val(nsec), ddir,
- io_u->xfer_buflen, io_u->offset, io_u->ioprio };
+ io_u->xfer_buflen, io_u->offset, io_u->ioprio,
+ ntime_since(&td->epoch, &io_u->issue_time) };
add_log_sample(td, td->slat_log, &sample);
}
@@ -3447,7 +3453,7 @@ void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
if (td->lat_log) {
struct log_sample sample = { sample_val(nsec), ddir, bs,
- io_u->offset, io_u->ioprio };
+ io_u->offset, io_u->ioprio, 0 };
add_log_sample(td, td->lat_log, &sample);
}
@@ -3492,7 +3498,7 @@ void add_bw_sample(struct thread_data *td, struct io_u *io_u,
if (td->bw_log) {
struct log_sample sample = { sample_val(rate), io_u->ddir,
- bytes, io_u->offset, io_u->ioprio };
+ bytes, io_u->offset, io_u->ioprio, 0 };
add_log_sample(td, td->bw_log, &sample);
}
@@ -3545,7 +3551,7 @@ static int __add_samples(struct thread_data *td, struct timespec *parent_tv,
if (log) {
struct log_sample sample = {
- sample_val(rate), ddir, 0, 0, 0 };
+ sample_val(rate), ddir, 0, 0, 0, 0 };
if (td->o.min_bs[ddir] == td->o.max_bs[ddir])
sample.bs = td->o.min_bs[ddir];
@@ -3589,7 +3595,7 @@ void add_iops_sample(struct thread_data *td, struct io_u *io_u,
if (td->iops_log) {
struct log_sample sample = { sample_val(1), io_u->ddir, bytes,
- io_u->offset, io_u->ioprio };
+ io_u->offset, io_u->ioprio, 0 };
add_log_sample(td, td->iops_log, &sample);
}
diff --git a/thread_options.h b/thread_options.h
index 20fc18a3..ee1e5b31 100644
--- a/thread_options.h
+++ b/thread_options.h
@@ -400,6 +400,7 @@ struct thread_options {
unsigned int log_entries;
unsigned int log_prio;
+ unsigned int log_issue_time;
};
#define FIO_TOP_STR_MAX 256
@@ -708,6 +709,7 @@ struct thread_options_pack {
uint32_t log_entries;
uint32_t log_prio;
+ uint32_t log_issue_time;
uint32_t fdp;
uint32_t dp_type;
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread* Re: [PATCH 5/9] introduce the log_issue_time option
2024-08-27 7:08 ` [PATCH 5/9] introduce the log_issue_time option Shin'ichiro Kawasaki
@ 2024-08-28 17:02 ` Jens Axboe
2024-08-29 5:50 ` Shinichiro Kawasaki
2024-08-28 17:38 ` Nick Neumann
2024-08-29 8:13 ` Shinichiro Kawasaki
2 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2024-08-28 17:02 UTC (permalink / raw)
To: Shin'ichiro Kawasaki, fio, Vincent Fu; +Cc: Damien Le Moal
This one needs a FIO_SERVER_VER increment.
--
Jens Axboe
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 5/9] introduce the log_issue_time option
2024-08-27 7:08 ` [PATCH 5/9] introduce the log_issue_time option Shin'ichiro Kawasaki
2024-08-28 17:02 ` Jens Axboe
@ 2024-08-28 17:38 ` Nick Neumann
2024-08-29 1:05 ` Damien Le Moal
2024-08-29 8:13 ` Shinichiro Kawasaki
2 siblings, 1 reply; 25+ messages in thread
From: Nick Neumann @ 2024-08-28 17:38 UTC (permalink / raw)
To: Shin'ichiro Kawasaki; +Cc: fio, Jens Axboe, Vincent Fu, Damien Le Moal
On Tue, Aug 27, 2024 at 2:09 AM Shin'ichiro Kawasaki
<shinichiro.kawasaki@wdc.com> wrote:
>
> When write_lat_log option is set, fio outputs the 'clat' completion
> latency log file. This clat log can be used to analyze IO latency. This
> file can also be considered as an IO trace file as each IO entry can
> fully describe an IO when the --log_offset and --log_prio options are
> also used.
>
> However, using the clat log file as an IO trace is inaccuate due to two
> reasons. Firstly, the time field of each entry uses millisecond units,
> which is too coarse for fast IOs (e.g. when using SSDs). Secondly, the
> time field value is recorded not at command completion, but at log
> sample recording. The time field value is slightly different from the IO
> completion time. It can be used only as an approximated completion time.
>
> To analyze IO issue time and IO completion time accurately using the
> clat log, introduce the new option 'log_issue_time'. When this option is
> set, add another field to the log file entries and put the IO issue time
> in nanosecond to the field. The IO completion time can be calculated by
> adding the completion latency to the IO issue time.
>
> The IO issue time field is added to 'slat' submit latency log file also.
> This helps to calculate IO start time by subtracting the submission
> latency from the IO issue time.
>
> The log_issue_time option can be used for IO trace when the
> write_lat_log option and the log_offset options are set together. When
> the log_issue_time option is set but the write_lat_log option or the
> log_offset option is not set, fio errors out. When the log_issue_time
> option and the write_lat_log option are set together with other
> write_X_log options, the IO issue time field is added to all log files.
> As for the other log files than clat and slat log, the IO issue time
> does not have meaning then '0' is set to the field. When log_avg_msec
> option is set, average of the log values of the specified duration is
> logged. The IO issue time does not have meaning in this case either and
> '0' is set to the field.
>
> Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
> ---
> cconv.c | 2 ++
> client.c | 30 ++++++++++++++++++++----------
> init.c | 12 ++++++++++++
> iolog.c | 29 +++++++++++++++++++++--------
> iolog.h | 30 ++++++++++++++++++++++++------
> options.c | 10 ++++++++++
> server.c | 4 ++++
> server.h | 1 +
> stat.c | 24 +++++++++++++++---------
> thread_options.h | 2 ++
> 10 files changed, 111 insertions(+), 33 deletions(-)
>
> diff --git a/cconv.c b/cconv.c
> index 1d494982..9571f1a8 100644
> --- a/cconv.c
> +++ b/cconv.c
> @@ -216,6 +216,7 @@ int convert_thread_options_to_cpu(struct thread_options *o,
> o->log_max = le32_to_cpu(top->log_max);
> o->log_offset = le32_to_cpu(top->log_offset);
> o->log_prio = le32_to_cpu(top->log_prio);
> + o->log_issue_time = le32_to_cpu(top->log_issue_time);
> o->log_gz = le32_to_cpu(top->log_gz);
> o->log_gz_store = le32_to_cpu(top->log_gz_store);
> o->log_alternate_epoch = le32_to_cpu(top->log_alternate_epoch);
> @@ -458,6 +459,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
> top->log_max = cpu_to_le32(o->log_max);
> top->log_offset = cpu_to_le32(o->log_offset);
> top->log_prio = cpu_to_le32(o->log_prio);
> + top->log_issue_time = cpu_to_le32(o->log_issue_time);
> top->log_gz = cpu_to_le32(o->log_gz);
> top->log_gz_store = cpu_to_le32(o->log_gz_store);
> top->log_alternate_epoch = cpu_to_le32(o->log_alternate_epoch);
> diff --git a/client.c b/client.c
> index 0b6044dd..5964faba 100644
> --- a/client.c
> +++ b/client.c
> @@ -1388,8 +1388,8 @@ static void handle_eta(struct fio_client *client, struct fio_net_cmd *cmd)
> static void client_flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
> uint64_t sample_size)
> {
> - struct io_sample *s;
> - int log_offset;
> + struct io_sample *s, *s_tmp;
> + bool log_offset, log_issue_time;
> uint64_t i, j, nr_samples;
> struct io_u_plat_entry *entry;
> uint64_t *io_u_plat;
> @@ -1399,15 +1399,17 @@ static void client_flush_hist_samples(FILE *f, int hist_coarseness, void *sample
> if (!sample_size)
> return;
>
> - s = __get_sample(samples, 0, 0);
> + s = __get_sample(samples, 0, 0, 0);
> log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
> + log_issue_time = (s->__ddir & LOG_ISSUE_TIME_SAMPLE_BIT) != 0;
>
> - nr_samples = sample_size / __log_entry_sz(log_offset);
> + nr_samples = sample_size / __log_entry_sz(log_offset, log_issue_time);
>
> for (i = 0; i < nr_samples; i++) {
>
> - s = (struct io_sample *)((char *)__get_sample(samples, log_offset, i) +
> - i * sizeof(struct io_u_plat_entry));
> + s_tmp = __get_sample(samples, log_offset, log_issue_time, i);
> + s = (struct io_sample *)((char *)s_tmp +
> + i * sizeof(struct io_u_plat_entry));
>
> entry = s->data.plat_entry;
> io_u_plat = entry->io_u_plat;
> @@ -1595,6 +1597,7 @@ static struct cmd_iolog_pdu *convert_iolog_gz(struct fio_net_cmd *cmd,
> uint64_t nr_samples;
> size_t total;
> char *p;
> + size_t log_entry_size;
>
> stream.zalloc = Z_NULL;
> stream.zfree = Z_NULL;
> @@ -1610,11 +1613,13 @@ static struct cmd_iolog_pdu *convert_iolog_gz(struct fio_net_cmd *cmd,
> */
> nr_samples = le64_to_cpu(pdu->nr_samples);
>
> + log_entry_size = __log_entry_sz(le32_to_cpu(pdu->log_offset),
> + le32_to_cpu(pdu->log_issue_time));
> if (pdu->log_type == IO_LOG_TYPE_HIST)
> - total = nr_samples * (__log_entry_sz(le32_to_cpu(pdu->log_offset)) +
> - sizeof(struct io_u_plat_entry));
> + total = nr_samples * (log_entry_size +
> + sizeof(struct io_u_plat_entry));
> else
> - total = nr_samples * __log_entry_sz(le32_to_cpu(pdu->log_offset));
> + total = nr_samples * log_entry_size;
> ret = malloc(total + sizeof(*pdu));
> ret->nr_samples = nr_samples;
>
> @@ -1703,6 +1708,7 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
> ret->compressed = le32_to_cpu(ret->compressed);
> ret->log_offset = le32_to_cpu(ret->log_offset);
> ret->log_prio = le32_to_cpu(ret->log_prio);
> + ret->log_issue_time = le32_to_cpu(ret->log_issue_time);
> ret->log_hist_coarseness = le32_to_cpu(ret->log_hist_coarseness);
> ret->per_job_logs = le32_to_cpu(ret->per_job_logs);
>
> @@ -1713,7 +1719,7 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
> for (i = 0; i < ret->nr_samples; i++) {
> struct io_sample *s;
>
> - s = __get_sample(samples, ret->log_offset, i);
> + s = __get_sample(samples, ret->log_offset, ret->log_issue_time, i);
> if (ret->log_type == IO_LOG_TYPE_HIST)
> s = (struct io_sample *)((char *)s + sizeof(struct io_u_plat_entry) * i);
>
> @@ -1730,6 +1736,10 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
> s->aux[IOS_AUX_OFFSET_INDEX] =
> le64_to_cpu(s->aux[IOS_AUX_OFFSET_INDEX]);
>
> + if (ret->log_issue_time)
> + s->aux[IOS_AUX_ISSUE_TIME_INDEX] =
> + le64_to_cpu(s->aux[IOS_AUX_ISSUE_TIME_INDEX]);
> +
> if (ret->log_type == IO_LOG_TYPE_HIST) {
> s->data.plat_entry = (struct io_u_plat_entry *)(((char *)s) + sizeof(*s));
> s->data.plat_entry->list.next = NULL;
> diff --git a/init.c b/init.c
> index ff3e9a90..3b213c0e 100644
> --- a/init.c
> +++ b/init.c
> @@ -1621,12 +1621,18 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
> .log_type = IO_LOG_TYPE_LAT,
> .log_offset = o->log_offset,
> .log_prio = o->log_prio,
> + .log_issue_time = o->log_issue_time,
> .log_gz = o->log_gz,
> .log_gz_store = o->log_gz_store,
> };
> const char *pre = make_log_name(o->lat_log_file, o->name);
> const char *suf;
>
> + if (o->log_issue_time && !o->log_offset) {
> + log_err("fio: log_issue_time option requires log_write_lat_log and log_offset options\n");
> + goto err;
> + }
> +
> if (p.log_gz_store)
> suf = "log.fz";
> else
> @@ -1650,6 +1656,9 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
> setup_log(&td->clat_log, &p, logname);
> }
>
> + } else if (o->log_issue_time) {
> + log_err("fio: log_issue_time option requires log_write_lat_log and log_offset options\n");
> + goto err;
> }
>
> if (o->write_hist_log) {
> @@ -1661,6 +1670,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
> .log_type = IO_LOG_TYPE_HIST,
> .log_offset = o->log_offset,
> .log_prio = o->log_prio,
> + .log_issue_time = o->log_issue_time,
> .log_gz = o->log_gz,
> .log_gz_store = o->log_gz_store,
> };
> @@ -1693,6 +1703,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
> .log_type = IO_LOG_TYPE_BW,
> .log_offset = o->log_offset,
> .log_prio = o->log_prio,
> + .log_issue_time = o->log_issue_time,
> .log_gz = o->log_gz,
> .log_gz_store = o->log_gz_store,
> };
> @@ -1725,6 +1736,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
> .log_type = IO_LOG_TYPE_IOPS,
> .log_offset = o->log_offset,
> .log_prio = o->log_prio,
> + .log_issue_time = o->log_issue_time,
> .log_gz = o->log_gz,
> .log_gz_store = o->log_gz_store,
> };
> diff --git a/iolog.c b/iolog.c
> index bb1ade86..aadb0d05 100644
> --- a/iolog.c
> +++ b/iolog.c
> @@ -845,6 +845,7 @@ void setup_log(struct io_log **log, struct log_params *p,
> l->log_type = p->log_type;
> l->log_offset = p->log_offset;
> l->log_prio = p->log_prio;
> + l->log_issue_time = p->log_issue_time;
> l->log_gz = p->log_gz;
> l->log_gz_store = p->log_gz_store;
> l->avg_msec = p->avg_msec;
> @@ -887,6 +888,9 @@ void setup_log(struct io_log **log, struct log_params *p,
> if (l->td && l->td->o.log_max == IO_LOG_SAMPLE_BOTH)
> l->log_ddir_mask |= LOG_AVG_MAX_SAMPLE_BIT;
>
> + if (l->log_issue_time)
> + l->log_ddir_mask |= LOG_ISSUE_TIME_SAMPLE_BIT;
> +
> INIT_FLIST_HEAD(&l->chunk_list);
>
> if (l->log_gz && !p->td)
> @@ -969,7 +973,7 @@ static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
> uint64_t sample_size)
> {
> struct io_sample *s;
> - int log_offset;
> + bool log_offset, log_issue_time;
> uint64_t i, j, nr_samples;
> struct io_u_plat_entry *entry, *entry_before;
> uint64_t *io_u_plat;
> @@ -980,13 +984,14 @@ static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
> if (!sample_size)
> return;
>
> - s = __get_sample(samples, 0, 0);
> + s = __get_sample(samples, 0, 0, 0);
> log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
> + log_issue_time = (s->__ddir & LOG_ISSUE_TIME_SAMPLE_BIT) != 0;
>
> - nr_samples = sample_size / __log_entry_sz(log_offset);
> + nr_samples = sample_size / __log_entry_sz(log_offset, log_issue_time);
>
> for (i = 0; i < nr_samples; i++) {
> - s = __get_sample(samples, log_offset, i);
> + s = __get_sample(samples, log_offset, log_issue_time, i);
>
> entry = s->data.plat_entry;
> io_u_plat = entry->io_u_plat;
> @@ -1038,7 +1043,7 @@ static int print_sample_fields(char **p, size_t *left, const char *fmt, ...) {
> void flush_samples(FILE *f, void *samples, uint64_t sample_size)
> {
> struct io_sample *s;
> - bool log_offset, log_prio, log_avg_max;
> + bool log_offset, log_prio, log_avg_max, log_issue_time;
> uint64_t i, nr_samples;
> char buf[256];
> char *p;
> @@ -1048,15 +1053,16 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size)
> if (!sample_size)
> return;
>
> - s = __get_sample(samples, 0, 0);
> + s = __get_sample(samples, 0, 0, 0);
> log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
> log_prio = (s->__ddir & LOG_PRIO_SAMPLE_BIT) != 0;
> log_avg_max = (s->__ddir & LOG_AVG_MAX_SAMPLE_BIT) != 0;
> + log_issue_time = (s->__ddir & LOG_ISSUE_TIME_SAMPLE_BIT) != 0;
>
> - nr_samples = sample_size / __log_entry_sz(log_offset);
> + nr_samples = sample_size / __log_entry_sz(log_offset, log_issue_time);
>
> for (i = 0; i < nr_samples; i++) {
> - s = __get_sample(samples, log_offset, i);
> + s = __get_sample(samples, log_offset, log_issue_time, i);
> p = buf;
> left = sizeof(buf);
>
> @@ -1094,6 +1100,13 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size)
> if (ret)
> return;
>
> + if (log_issue_time) {
> + ret = print_sample_fields(&p, &left, ", %llu",
> + (unsigned long long) s->aux[IOS_AUX_ISSUE_TIME_INDEX]);
> + if (ret)
> + return;
> + }
> +
> fprintf(f, "%s\n", buf);
> }
> }
> diff --git a/iolog.h b/iolog.h
> index d868921b..e864d169 100644
> --- a/iolog.h
> +++ b/iolog.h
> @@ -62,6 +62,7 @@ struct io_sample {
> */
> enum {
> IOS_AUX_OFFSET_INDEX,
> + IOS_AUX_ISSUE_TIME_INDEX,
> };
>
> enum {
> @@ -122,6 +123,11 @@ struct io_log {
> */
> unsigned int log_prio;
>
> + /*
> + * Log I/O issuing time
> + */
> + unsigned int log_issue_time;
> +
> /*
> * Max size of log entries before a chunk is compressed
> */
> @@ -171,9 +177,14 @@ struct io_log {
> * If the bit following prioity sample vit is set, we report both avg and max
> */
> #define LOG_AVG_MAX_SAMPLE_BIT 0x20000000U
> +/*
> + * If the bit following AVG_MAX_SAMPLE_BIT is set, we report the issue time also
> + */
> +#define LOG_ISSUE_TIME_SAMPLE_BIT 0x10000000U
>
> #define LOG_SAMPLE_BITS (LOG_OFFSET_SAMPLE_BIT | LOG_PRIO_SAMPLE_BIT |\
> - LOG_AVG_MAX_SAMPLE_BIT)
> + LOG_AVG_MAX_SAMPLE_BIT |\
> + LOG_ISSUE_TIME_SAMPLE_BIT)
> #define io_sample_ddir(io) ((io)->__ddir & ~LOG_SAMPLE_BITS)
>
> static inline void io_sample_set_ddir(struct io_log *log,
> @@ -183,19 +194,22 @@ static inline void io_sample_set_ddir(struct io_log *log,
> io->__ddir = ddir | log->log_ddir_mask;
> }
>
> -static inline size_t __log_entry_sz(bool log_offset)
> +static inline size_t __log_entry_sz(bool log_offset, bool log_issue_time)
> {
> size_t ret = sizeof(struct io_sample);
>
> if (log_offset)
> ret += sizeof(uint64_t);
>
> + if (log_issue_time)
> + ret += sizeof(uint64_t);
> +
> return ret;
> }
>
> static inline size_t log_entry_sz(struct io_log *log)
> {
> - return __log_entry_sz(log->log_offset);
> + return __log_entry_sz(log->log_offset, log->log_issue_time);
> }
>
> static inline size_t log_sample_sz(struct io_log *log, struct io_logs *cur_log)
> @@ -203,10 +217,12 @@ static inline size_t log_sample_sz(struct io_log *log, struct io_logs *cur_log)
> return cur_log->nr_samples * log_entry_sz(log);
> }
>
> -static inline struct io_sample *__get_sample(void *samples, int log_offset,
> +static inline struct io_sample *__get_sample(void *samples, bool log_offset,
> + bool log_issue_time,
> uint64_t sample)
> {
> - uint64_t sample_offset = sample * __log_entry_sz(log_offset);
> + uint64_t sample_offset = sample *
> + __log_entry_sz(log_offset, log_issue_time);
> return (struct io_sample *) ((char *) samples + sample_offset);
> }
>
> @@ -219,7 +235,8 @@ static inline struct io_sample *get_sample(struct io_log *iolog,
> struct io_logs *cur_log,
> uint64_t sample)
> {
> - return __get_sample(cur_log->log, iolog->log_offset, sample);
> + return __get_sample(cur_log->log,
> + iolog->log_offset, iolog->log_issue_time, sample);
> }
>
> enum {
> @@ -295,6 +312,7 @@ struct log_params {
> int log_type;
> int log_offset;
> int log_prio;
> + int log_issue_time;
> int log_gz;
> int log_gz_store;
> int log_compress;
> diff --git a/options.c b/options.c
> index ab650bb7..73fd4a7f 100644
> --- a/options.c
> +++ b/options.c
> @@ -4727,6 +4727,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
> .category = FIO_OPT_C_LOG,
> .group = FIO_OPT_G_INVALID,
> },
> + {
> + .name = "log_issue_time",
> + .lname = "Log IO issue time",
> + .type = FIO_OPT_BOOL,
> + .off1 = offsetof(struct thread_options, log_issue_time),
> + .help = "Include IO issue time for each log entry",
> + .def = "0",
> + .category = FIO_OPT_C_LOG,
> + .group = FIO_OPT_G_INVALID,
> + },
> #ifdef CONFIG_ZLIB
> {
> .name = "log_compression",
> diff --git a/server.c b/server.c
> index dbb9eb5e..5967f421 100644
> --- a/server.c
> +++ b/server.c
> @@ -2298,6 +2298,10 @@ int fio_send_iolog(struct thread_data *td, struct io_log *log, const char *name)
> if (log->log_offset)
> s->aux[IOS_AUX_OFFSET_INDEX] =
> cpu_to_le64(s->aux[IOS_AUX_OFFSET_INDEX]);
> +
> + if (log->log_issue_time)
> + s->aux[IOS_AUX_ISSUE_TIME_INDEX] =
> + cpu_to_le64(s->aux[IOS_AUX_ISSUE_TIME_INDEX]);
> }
> }
>
> diff --git a/server.h b/server.h
> index a8e4dbf2..2157da97 100644
> --- a/server.h
> +++ b/server.h
> @@ -197,6 +197,7 @@ struct cmd_iolog_pdu {
> uint32_t compressed;
> uint32_t log_offset;
> uint32_t log_prio;
> + uint32_t log_issue_time;
> uint32_t log_hist_coarseness;
> uint32_t per_job_logs;
> uint8_t name[FIO_NET_NAME_MAX];
> diff --git a/stat.c b/stat.c
> index 56d04e7a..2cfd6819 100644
> --- a/stat.c
> +++ b/stat.c
> @@ -30,6 +30,7 @@ struct log_sample {
> uint64_t bs;
> uint64_t offset;
> uint16_t priority;
> + uint64_t issue_time;
> };
>
> struct fio_sem *stat_sem;
> @@ -3066,6 +3067,9 @@ static void __add_log_sample(struct io_log *iolog, unsigned long t,
> if (iolog->log_offset)
> s->aux[IOS_AUX_OFFSET_INDEX] = sample->offset;
>
> + if (iolog->log_issue_time)
> + s->aux[IOS_AUX_ISSUE_TIME_INDEX] = sample->issue_time;
> +
> cur_log->nr_samples++;
> return;
> }
> @@ -3160,7 +3164,7 @@ static void __add_stat_to_log(struct io_log *iolog, enum fio_ddir ddir,
> * had actual samples done.
> */
> if (iolog->avg_window[ddir].samples) {
> - struct log_sample sample = { {{ 0, 0 }}, ddir, 0, 0, 0 };
> + struct log_sample sample = { {{ 0, 0 }}, ddir, 0, 0, 0, 0 };
> union io_sample_data *d = &sample.data;
>
> if (log_max == IO_LOG_SAMPLE_AVG) {
> @@ -3258,7 +3262,7 @@ void add_agg_sample(union io_sample_data data, enum fio_ddir ddir,
> unsigned long long bs)
> {
> struct io_log *iolog;
> - struct log_sample sample = { data, ddir, bs, 0, 0 };
> + struct log_sample sample = { data, ddir, bs, 0, 0, 0 };
>
> if (!ddir_rw(ddir))
> return;
> @@ -3336,7 +3340,8 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
>
> if (td->clat_log) {
> struct log_sample sample = { sample_val(nsec), ddir, bs,
> - offset, ioprio };
> + offset, ioprio,
> + ntime_since(&td->epoch, &io_u->issue_time) };
>
> add_log_sample(td, td->clat_log, &sample);
> }
> @@ -3366,7 +3371,7 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
> uint64_t *io_u_plat;
> struct io_u_plat_entry *dst;
> struct log_sample sample = { {{ 0, 0 }}, ddir, bs,
> - offset, ioprio };
> + offset, ioprio, 0 };
>
> /*
> * Make a byte-for-byte copy of the latency histogram
> @@ -3418,7 +3423,8 @@ void add_slat_sample(struct thread_data *td, struct io_u *io_u)
>
> if (td->slat_log) {
> struct log_sample sample = { sample_val(nsec), ddir,
> - io_u->xfer_buflen, io_u->offset, io_u->ioprio };
> + io_u->xfer_buflen, io_u->offset, io_u->ioprio,
> + ntime_since(&td->epoch, &io_u->issue_time) };
>
> add_log_sample(td, td->slat_log, &sample);
> }
> @@ -3447,7 +3453,7 @@ void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
>
> if (td->lat_log) {
> struct log_sample sample = { sample_val(nsec), ddir, bs,
> - io_u->offset, io_u->ioprio };
> + io_u->offset, io_u->ioprio, 0 };
>
> add_log_sample(td, td->lat_log, &sample);
> }
> @@ -3492,7 +3498,7 @@ void add_bw_sample(struct thread_data *td, struct io_u *io_u,
>
> if (td->bw_log) {
> struct log_sample sample = { sample_val(rate), io_u->ddir,
> - bytes, io_u->offset, io_u->ioprio };
> + bytes, io_u->offset, io_u->ioprio, 0 };
>
> add_log_sample(td, td->bw_log, &sample);
> }
> @@ -3545,7 +3551,7 @@ static int __add_samples(struct thread_data *td, struct timespec *parent_tv,
>
> if (log) {
> struct log_sample sample = {
> - sample_val(rate), ddir, 0, 0, 0 };
> + sample_val(rate), ddir, 0, 0, 0, 0 };
>
> if (td->o.min_bs[ddir] == td->o.max_bs[ddir])
> sample.bs = td->o.min_bs[ddir];
> @@ -3589,7 +3595,7 @@ void add_iops_sample(struct thread_data *td, struct io_u *io_u,
>
> if (td->iops_log) {
> struct log_sample sample = { sample_val(1), io_u->ddir, bytes,
> - io_u->offset, io_u->ioprio };
> + io_u->offset, io_u->ioprio, 0 };
>
> add_log_sample(td, td->iops_log, &sample);
> }
> diff --git a/thread_options.h b/thread_options.h
> index 20fc18a3..ee1e5b31 100644
> --- a/thread_options.h
> +++ b/thread_options.h
> @@ -400,6 +400,7 @@ struct thread_options {
>
> unsigned int log_entries;
> unsigned int log_prio;
> + unsigned int log_issue_time;
> };
>
> #define FIO_TOP_STR_MAX 256
> @@ -708,6 +709,7 @@ struct thread_options_pack {
>
> uint32_t log_entries;
> uint32_t log_prio;
> + uint32_t log_issue_time;
>
> uint32_t fdp;
> uint32_t dp_type;
> --
> 2.45.2
>
>
I am really excited to see this set of changes! I've been wanting (but
not had time) to have a single-file log output with all of the
information present, and this helps towards that while also achieving
its trace file goal.
Do you have any thoughts on eventually including the submit time or
submit latency in the clat file as well? Then one would have a single
file without any of the redundancies involved with multiple files.
I've also thought a bit about having a non-text but portable output
file option, like protobuf. The files are large, and gzip compresses
the per-I/O logs well, but I imagine protobuf would be a big win for
both outputting and ingesting the log files, and would be way smaller
than the non-compressed log files.
Thanks for the new feature and any input on my thoughts!
- Nick
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH 5/9] introduce the log_issue_time option
2024-08-28 17:38 ` Nick Neumann
@ 2024-08-29 1:05 ` Damien Le Moal
2024-08-29 1:44 ` Nick Neumann
0 siblings, 1 reply; 25+ messages in thread
From: Damien Le Moal @ 2024-08-29 1:05 UTC (permalink / raw)
To: Nick Neumann, Shin'ichiro Kawasaki; +Cc: fio, Jens Axboe, Vincent Fu
On 8/29/24 02:38, Nick Neumann wrote:
> Do you have any thoughts on eventually including the submit time or
> submit latency in the clat file as well? Then one would have a single
> file without any of the redundancies involved with multiple files.
The submit time is what Shin'ichiro patches are adding :)
issue time == submit time. Maybe we should change the name to log_submit_time ?
With the code refactoring/cleanup that Shin'ichiro did, the submit latency can
easily be added too. Though one could argue that the "clat" name for the log
then becomes a little inappropriate.
> I've also thought a bit about having a non-text but portable output
> file option, like protobuf. The files are large, and gzip compresses
> the per-I/O logs well, but I imagine protobuf would be a big win for
> both outputting and ingesting the log files, and would be way smaller
> than the non-compressed log files.
Adding an option to change the clat file format can be done on top of this series.
>
> Thanks for the new feature and any input on my thoughts!
> - Nick
--
Damien Le Moal
Western Digital Research
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 5/9] introduce the log_issue_time option
2024-08-29 1:05 ` Damien Le Moal
@ 2024-08-29 1:44 ` Nick Neumann
0 siblings, 0 replies; 25+ messages in thread
From: Nick Neumann @ 2024-08-29 1:44 UTC (permalink / raw)
To: Damien Le Moal; +Cc: Shin'ichiro Kawasaki, fio, Jens Axboe, Vincent Fu
On Wed, Aug 28, 2024 at 8:05 PM Damien Le Moal <dlemoal@kernel.org> wrote:
> The submit time is what Shin'ichiro patches are adding :)
> issue time == submit time. Maybe we should change the name to log_submit_time ?
>
> With the code refactoring/cleanup that Shin'ichiro did, the submit latency can
> easily be added too. Though one could argue that the "clat" name for the log
> then becomes a little inappropriate.
Oops, sorry - yes, I should have said submit latency, or start time.
Either one makes the slat and lat logs redundant, and puts all of the
info in one single nice file, which happens to be clat. :-)
> Adding an option to change the clat file format can be done on top of this series.
Definitely. With everything that may end up in "clat", I could see the
option morphing to *only* output the single clat file. Right now one
ends up with the lat and slat files too, which take up lots of
(arguably unnecessary) space in memory.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 5/9] introduce the log_issue_time option
2024-08-27 7:08 ` [PATCH 5/9] introduce the log_issue_time option Shin'ichiro Kawasaki
2024-08-28 17:02 ` Jens Axboe
2024-08-28 17:38 ` Nick Neumann
@ 2024-08-29 8:13 ` Shinichiro Kawasaki
2 siblings, 0 replies; 25+ messages in thread
From: Shinichiro Kawasaki @ 2024-08-29 8:13 UTC (permalink / raw)
To: fio@vger.kernel.org, Jens Axboe, Vincent Fu; +Cc: Damien Le Moal
On Aug 27, 2024 / 16:08, Shin'ichiro Kawasaki wrote:
[...]
> --- a/init.c
> +++ b/init.c
> @@ -1621,12 +1621,18 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
> .log_type = IO_LOG_TYPE_LAT,
> .log_offset = o->log_offset,
> .log_prio = o->log_prio,
> + .log_issue_time = o->log_issue_time,
> .log_gz = o->log_gz,
> .log_gz_store = o->log_gz_store,
> };
> const char *pre = make_log_name(o->lat_log_file, o->name);
> const char *suf;
>
> + if (o->log_issue_time && !o->log_offset) {
> + log_err("fio: log_issue_time option requires log_write_lat_log and log_offset options\n");
While I prepare v2, I found a typo here: s/log_write_lat_log/write_lat_log/
> + goto err;
> + }
> +
> if (p.log_gz_store)
> suf = "log.fz";
> else
> @@ -1650,6 +1656,9 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
> setup_log(&td->clat_log, &p, logname);
> }
>
> + } else if (o->log_issue_time) {
> + log_err("fio: log_issue_time option requires log_write_lat_log and log_offset options\n");
Same here. Will fix them.
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 6/9] doc: fix the descriptions of the log_prio option
2024-08-27 7:08 [PATCH 0/9] introduce the log_issue_time option Shin'ichiro Kawasaki
` (4 preceding siblings ...)
2024-08-27 7:08 ` [PATCH 5/9] introduce the log_issue_time option Shin'ichiro Kawasaki
@ 2024-08-27 7:08 ` Shin'ichiro Kawasaki
2024-08-27 7:08 ` [PATCH 7/9] doc: describe the log_issue_time option Shin'ichiro Kawasaki
` (3 subsequent siblings)
9 siblings, 0 replies; 25+ messages in thread
From: Shin'ichiro Kawasaki @ 2024-08-27 7:08 UTC (permalink / raw)
To: fio, Jens Axboe, Vincent Fu; +Cc: Damien Le Moal, Shin'ichiro Kawasaki
Commit 03ec570f6e57 ("fio: Introduce the log_prio option") added the
description of the log_prio option to fio.1. However, the description
was wrong. It mentioned that the log_prio option would change the number
of fields in the Log File Format, but actually it does not do so. Fix
the description.
Also, the commit did not update HOWTO.rst for the log_prio option. To
keep HOWTO.rst same as fio.1, add the missing descriptions to HOWTO.rst.
Fixes: 03ec570f6e57 ("fio: Introduce the log_prio option")
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
---
HOWTO.rst | 19 +++++++++++++++++--
fio.1 | 7 ++++---
2 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/HOWTO.rst b/HOWTO.rst
index 11b1c669..49a28464 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -4237,6 +4237,13 @@ Measurements and reporting
entry as well as the other data values. Defaults to 0 meaning that
offsets are not present in logs. Also see `Log File Formats`_.
+.. option:: log_prio=bool
+
+ If this is set, the *Command priority* field in `Log File Formats`_
+ shows the priority value and the IO priority class of the command.
+ Otherwise, the field shows if the command has the highest RT
+ priority class or not. Also see `Log File Formats`_.
+
.. option:: log_compression=int
If this is set, fio will compress the I/O logs as it goes, to keep the
@@ -5227,8 +5234,16 @@ The entry's *block size* is always in bytes. The *offset* is the position in byt
from the start of the file for that particular I/O. The logging of the offset can be
toggled with :option:`log_offset`.
-*Command priority* is 0 for normal priority and 1 for high priority. This is controlled
-by the ioengine specific :option:`cmdprio_percentage`.
+If :option:`log_prio` is not set, the entry's *Command priority* is 1 for an IO
+executed with the highest RT priority class (:option:`prioclass` =1 or
+:option:`cmdprio_class` =1) and 0 otherwise. This is controlled by the
+:option:`prioclass` option and the ioengine specific
+:option:`cmdprio_percentage` :option:`cmdprio_class` options. If
+:option:`log_prio` is set, the entry's *Command priority* is the priority set
+for the IO, as a 16-bits hexadecimal number with the lowest 13 bits indicating
+the priority value (:option:`prio` and :option:`cmdprio` options) and the
+highest 3 bits indicating the IO priority class (:option:`prioclass` and
+:option:`cmdprio_class` options).
Fio defaults to logging every individual I/O but when windowed logging is set
through :option:`log_avg_msec`, either the average (by default), the maximum
diff --git a/fio.1 b/fio.1
index 6db76b2c..0aabe89e 100644
--- a/fio.1
+++ b/fio.1
@@ -3943,9 +3943,10 @@ entry as well as the other data values. Defaults to 0 meaning that
offsets are not present in logs. Also see \fBLOG FILE FORMATS\fR section.
.TP
.BI log_prio \fR=\fPbool
-If this is set, the iolog options will include the I/O priority for the I/O
-entry as well as the other data values. Defaults to 0 meaning that
-I/O priorities are not present in logs. Also see \fBLOG FILE FORMATS\fR section.
+If this is set, the `Command priority` field in \fBLOG FILE FORMATS\fR
+shows the priority value and the IO priority class of the command.
+Otherwise, the field shows if the command has the highest RT priority
+class or not. Also see \fBLOG FILE FORMATS\fR section.
.TP
.BI log_compression \fR=\fPint
If this is set, fio will compress the I/O logs as it goes, to keep the
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH 7/9] doc: describe the log_issue_time option
2024-08-27 7:08 [PATCH 0/9] introduce the log_issue_time option Shin'ichiro Kawasaki
` (5 preceding siblings ...)
2024-08-27 7:08 ` [PATCH 6/9] doc: fix the descriptions of the log_prio option Shin'ichiro Kawasaki
@ 2024-08-27 7:08 ` Shin'ichiro Kawasaki
2024-08-27 7:08 ` [PATCH 8/9] t/jobs/t0033: add test for the log file format Shin'ichiro Kawasaki
` (2 subsequent siblings)
9 siblings, 0 replies; 25+ messages in thread
From: Shin'ichiro Kawasaki @ 2024-08-27 7:08 UTC (permalink / raw)
To: fio, Jens Axboe, Vincent Fu; +Cc: Damien Le Moal, Shin'ichiro Kawasaki
The recent commit introduced the new option log_issue_time. Describe its
feature and restrictions.
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
---
HOWTO.rst | 21 +++++++++++++++++----
fio.1 | 19 +++++++++++++++----
2 files changed, 32 insertions(+), 8 deletions(-)
diff --git a/HOWTO.rst b/HOWTO.rst
index 49a28464..49486cfd 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -4244,6 +4244,14 @@ Measurements and reporting
Otherwise, the field shows if the command has the highest RT
priority class or not. Also see `Log File Formats`_.
+.. option:: log_issue_time=bool
+
+ If this is set, the iolog options will include the command issue time
+ for the I/O entry as well as the other data values. Defaults to 0
+ meaning that command issue times are not present in logs. Also see
+ `Log File Formats`_. This option shall be set together with
+ :option:`write_lat_log` and :option:`log_offset`.
+
.. option:: log_compression=int
If this is set, fio will compress the I/O logs as it goes, to keep the
@@ -5209,7 +5217,7 @@ Fio supports a variety of log file formats, for logging latencies, bandwidth,
and IOPS. The logs share a common format, which looks like this:
*time* (`msec`), *value*, *data direction*, *block size* (`bytes`),
- *offset* (`bytes`), *command priority*
+ *offset* (`bytes`), *command priority*, *issue time* (`nsec`)
*Time* for the log entry is always in milliseconds. The *value* logged depends
on the type of log, it will be one of the following:
@@ -5245,6 +5253,11 @@ the priority value (:option:`prio` and :option:`cmdprio` options) and the
highest 3 bits indicating the IO priority class (:option:`prioclass` and
:option:`cmdprio_class` options).
+The entry's *issue time* is the command issue time in nanoseconds. The logging
+of the issue time can be toggled with :option:`log_issue_time`. This field has
+valid values in completion latency log file (clat), or submit latency log file
+(slat). The field has value 0 in other logs files.
+
Fio defaults to logging every individual I/O but when windowed logging is set
through :option:`log_avg_msec`, either the average (by default), the maximum
(:option:`log_window_value` is set to max) *value* seen over the specified period
@@ -5253,12 +5266,12 @@ is set to both) is recorded. The log file format when both the values are report
takes this form:
*time* (`msec`), *value*, *value1*, *data direction*, *block size* (`bytes`),
- *offset* (`bytes`), *command priority*
+ *offset* (`bytes`), *command priority*, *issue time* (`nsec`)
Each *data direction* seen within the window period will aggregate its values in a
-separate row. Further, when using windowed logging the *block size* and *offset*
-entries will always contain 0.
+separate row. Further, when using windowed logging the *block size*, *offset*
+and *issue time* entries will always contain 0.
Client/Server
diff --git a/fio.1 b/fio.1
index 0aabe89e..29e5ce95 100644
--- a/fio.1
+++ b/fio.1
@@ -3948,6 +3948,12 @@ shows the priority value and the IO priority class of the command.
Otherwise, the field shows if the command has the highest RT priority
class or not. Also see \fBLOG FILE FORMATS\fR section.
.TP
+.BI log_issue_time \fR=\fPbool
+If this is set, the iolog options will include the command issue time for the
+I/O entry as well as the other data values. Defaults to 0 meaning that command
+issue times are not present in logs. Also see \fBLOG FILE FORMATS\fR section.
+This option shall be set together with \fBwrite_lat_log\fR and \fBlog_offset\fR.
+.TP
.BI log_compression \fR=\fPint
If this is set, fio will compress the I/O logs as it goes, to keep the
memory footprint lower. When a log reaches the specified size, that chunk is
@@ -4943,7 +4949,7 @@ and IOPS. The logs share a common format, which looks like this:
.RS
.P
time (msec), value, data direction, block size (bytes), offset (bytes),
-command priority
+command priority, issue time (nsec)
.RE
.P
`Time' for the log entry is always in milliseconds. The `value' logged depends
@@ -4986,6 +4992,11 @@ number with the lowest 13 bits indicating the priority value (\fBprio\fR and
\fBcmdprio\fR options) and the highest 3 bits indicating the IO priority class
(\fBprioclass\fR and \fBcmdprio_class\fR options).
.P
+The entry's `issue time` is the command issue time in nanoseconds. The logging
+of the issue time can be toggled with \fBlog_issue_time\fR. This field has valid
+values in completion latency log file (clat), or submit latency log file (slat).
+The field has value 0 in other log files.
+.P
Fio defaults to logging every individual I/O but when windowed logging is set
through \fBlog_avg_msec\fR, either the average (by default), the maximum
(\fBlog_window_value\fR is set to max) `value' seen over the specified period of
@@ -4995,12 +5006,12 @@ takes this form:
.RS
.P
time (msec), value, value1, data direction, block size (bytes), offset (bytes),
-command priority
+command priority, issue time (nsec)
.RE
.P
Each `data direction' seen within the window period will aggregate its values
-in a separate row. Further, when using windowed logging the `block size' and
-`offset' entries will always contain 0.
+in a separate row. Further, when using windowed logging the `block size',
+`offset' and `issue time` entries will always contain 0.
.SH CLIENT / SERVER
Normally fio is invoked as a stand-alone application on the machine where the
I/O workload should be generated. However, the backend and frontend of fio can
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH 8/9] t/jobs/t0033: add test for the log file format
2024-08-27 7:08 [PATCH 0/9] introduce the log_issue_time option Shin'ichiro Kawasaki
` (6 preceding siblings ...)
2024-08-27 7:08 ` [PATCH 7/9] doc: describe the log_issue_time option Shin'ichiro Kawasaki
@ 2024-08-27 7:08 ` Shin'ichiro Kawasaki
2024-08-28 18:46 ` Vincent Fu
2024-08-27 7:08 ` [PATCH 9/9] t/jobs/t0034: add test for the log_issue_time option Shin'ichiro Kawasaki
2024-08-28 17:06 ` [PATCH 0/9] introduce " Jens Axboe
9 siblings, 1 reply; 25+ messages in thread
From: Shin'ichiro Kawasaki @ 2024-08-27 7:08 UTC (permalink / raw)
To: fio, Jens Axboe, Vincent Fu; +Cc: Damien Le Moal, Shin'ichiro Kawasaki
Add a test to check the log file format which is described in the "Log
File Format" section in HOWTO.rst. Generate log files using combination
of options relevant to log files, and check that lines in log files have
the format expected. This test helps to confirm that the changes in the
log file related functions do not cause regressions.
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
---
t/jobs/t0033.fio | 28 ++++++++++++++++++++++++++++
t/run-fio-tests.py | 35 +++++++++++++++++++++++++++++++++++
2 files changed, 63 insertions(+)
create mode 100644 t/jobs/t0033.fio
diff --git a/t/jobs/t0033.fio b/t/jobs/t0033.fio
new file mode 100644
index 00000000..038e388f
--- /dev/null
+++ b/t/jobs/t0033.fio
@@ -0,0 +1,28 @@
+[global]
+rw=read
+filename=t0033file
+size=8k
+time_based
+runtime=100ms
+ioengine=libaio
+iodepth=1
+
+[job1]
+write_bw_log=log
+log_prio=1
+
+[job2]
+write_lat_log=log
+log_avg_msec=20
+log_window_value=both
+
+[job3]
+write_iops_log=log
+log_offset=1
+log_prio=1
+
+[job4]
+write_iops_log=log
+log_avg_msec=20
+log_window_value=both
+log_offset=1
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index 22580613..8bc343cd 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -47,6 +47,7 @@ import time
import shutil
import logging
import argparse
+import re
from pathlib import Path
from statsmodels.sandbox.stats.runs import runstest_1samp
from fiotestlib import FioExeTest, FioJobFileTest, run_fio_tests
@@ -553,6 +554,30 @@ class FioJobFileTest_t0029(FioJobFileTest):
if self.json_data['jobs'][1]['read']['io_kbytes'] != 8:
self.passed = False
+class FioJobFileTest_t0033(FioJobFileTest):
+ """Test log file format"""
+ def check_result(self):
+ super().check_result()
+
+ if not self.passed:
+ return
+
+ patterns = {
+ 'log_bw.1.log': '\\d+, \\d+, \\d+, \\d+, 0x[\\da-f]+\\n',
+ 'log_clat.2.log': '\\d+, \\d+, \\d+, \\d+, 0, \\d+\\n',
+ 'log_iops.3.log': '\\d+, \\d+, \\d+, \\d+, \\d+, 0x[\\da-f]+\\n',
+ 'log_iops.4.log': '\\d+, \\d+, \\d+, \\d+, 0, 0, \\d+\\n',
+ }
+
+ for logfile in patterns.keys():
+ file_path = os.path.join(self.paths['test_dir'], logfile)
+ with open(file_path, "r") as f:
+ line = f.readline()
+ if not re.match(patterns[logfile], line):
+ self.passed = False
+ self.failure_reason = "wrong log file format: " + logfile
+ return
+
class FioJobFileTest_iops_rate(FioJobFileTest):
"""Test consists of fio test job t0011
Confirm that job0 iops == 1000
@@ -878,6 +903,16 @@ TEST_LIST = [
'pre_success': SUCCESS_DEFAULT,
'requirements': [Requirements.linux, Requirements.libaio],
},
+ {
+ 'test_id': 33,
+ 'test_class': FioJobFileTest_t0033,
+ 'job': 't0033.fio',
+ 'success': SUCCESS_DEFAULT,
+ 'pre_job': None,
+ 'pre_success': None,
+ 'pre_success': SUCCESS_DEFAULT,
+ 'requirements': [],
+ },
{
'test_id': 1000,
'test_class': FioExeTest,
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread* Re: [PATCH 8/9] t/jobs/t0033: add test for the log file format
2024-08-27 7:08 ` [PATCH 8/9] t/jobs/t0033: add test for the log file format Shin'ichiro Kawasaki
@ 2024-08-28 18:46 ` Vincent Fu
2024-08-29 7:04 ` Shinichiro Kawasaki
0 siblings, 1 reply; 25+ messages in thread
From: Vincent Fu @ 2024-08-28 18:46 UTC (permalink / raw)
To: Shin'ichiro Kawasaki, fio, Jens Axboe; +Cc: Damien Le Moal
On 8/27/24 03:08, Shin'ichiro Kawasaki wrote:
> Add a test to check the log file format which is described in the "Log
> File Format" section in HOWTO.rst. Generate log files using combination
> of options relevant to log files, and check that lines in log files have
> the format expected. This test helps to confirm that the changes in the
> log file related functions do not cause regressions.
>
> Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
> ---
> t/jobs/t0033.fio | 28 ++++++++++++++++++++++++++++
> t/run-fio-tests.py | 35 +++++++++++++++++++++++++++++++++++
> 2 files changed, 63 insertions(+)
> create mode 100644 t/jobs/t0033.fio
>
> diff --git a/t/jobs/t0033.fio b/t/jobs/t0033.fio
> new file mode 100644
> index 00000000..038e388f
> --- /dev/null
> +++ b/t/jobs/t0033.fio
> @@ -0,0 +1,28 @@
> +[global]
> +rw=read
> +filename=t0033file
> +size=8k
> +time_based
> +runtime=100ms
> +ioengine=libaio
> +iodepth=1
> +
> +[job1]
> +write_bw_log=log
> +log_prio=1
> +
> +[job2]
> +write_lat_log=log
> +log_avg_msec=20
> +log_window_value=both
> +
> +[job3]
> +write_iops_log=log
> +log_offset=1
> +log_prio=1
> +
> +[job4]
> +write_iops_log=log
> +log_avg_msec=20
> +log_window_value=both
> +log_offset=1
> diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
> index 22580613..8bc343cd 100755
> --- a/t/run-fio-tests.py
> +++ b/t/run-fio-tests.py
> @@ -47,6 +47,7 @@ import time
> import shutil
> import logging
> import argparse
> +import re
> from pathlib import Path
> from statsmodels.sandbox.stats.runs import runstest_1samp
> from fiotestlib import FioExeTest, FioJobFileTest, run_fio_tests
> @@ -553,6 +554,30 @@ class FioJobFileTest_t0029(FioJobFileTest):
> if self.json_data['jobs'][1]['read']['io_kbytes'] != 8:
> self.passed = False
>
> +class FioJobFileTest_t0033(FioJobFileTest):
> + """Test log file format"""
> + def check_result(self):
> + super().check_result()
> +
> + if not self.passed:
> + return
> +
> + patterns = {
> + 'log_bw.1.log': '\\d+, \\d+, \\d+, \\d+, 0x[\\da-f]+\\n',
> + 'log_clat.2.log': '\\d+, \\d+, \\d+, \\d+, 0, \\d+\\n',
> + 'log_iops.3.log': '\\d+, \\d+, \\d+, \\d+, \\d+, 0x[\\da-f]+\\n',
> + 'log_iops.4.log': '\\d+, \\d+, \\d+, \\d+, 0, 0, \\d+\\n',
> + }
> +
> + for logfile in patterns.keys():
> + file_path = os.path.join(self.paths['test_dir'], logfile)
> + with open(file_path, "r") as f:
> + line = f.readline()
> + if not re.match(patterns[logfile], line):
> + self.passed = False
> + self.failure_reason = "wrong log file format: " + logfile
> + return
> +
> class FioJobFileTest_iops_rate(FioJobFileTest):
> """Test consists of fio test job t0011
> Confirm that job0 iops == 1000
> @@ -878,6 +903,16 @@ TEST_LIST = [
> 'pre_success': SUCCESS_DEFAULT,
> 'requirements': [Requirements.linux, Requirements.libaio],
> },
> + {
> + 'test_id': 33,
> + 'test_class': FioJobFileTest_t0033,
> + 'job': 't0033.fio',
> + 'success': SUCCESS_DEFAULT,
> + 'pre_job': None,
> + 'pre_success': None,
> + 'pre_success': SUCCESS_DEFAULT,
> + 'requirements': [],
Add [Requirements.linux, Requirements.libaio] here so that the script
does not try to run this test on platforms where it is not supported.
There were also some test failures where the job was able to run:
https://github.com/fiotestbot/fio/actions/runs/10573766613
Consider increasing the job duration and making log_avg_msec larger.
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH 8/9] t/jobs/t0033: add test for the log file format
2024-08-28 18:46 ` Vincent Fu
@ 2024-08-29 7:04 ` Shinichiro Kawasaki
0 siblings, 0 replies; 25+ messages in thread
From: Shinichiro Kawasaki @ 2024-08-29 7:04 UTC (permalink / raw)
To: Vincent Fu; +Cc: fio@vger.kernel.org, Jens Axboe, Damien Le Moal
On Aug 28, 2024 / 14:46, Vincent Fu wrote:
[...]
> Add [Requirements.linux, Requirements.libaio] here so that the script does
> not try to run this test on platforms where it is not supported.
Thanks, will do so in v2 for both test cases 0033 and 0034.
>
> There were also some test failures where the job was able to run:
>
> https://github.com/fiotestbot/fio/actions/runs/10573766613
>
> Consider increasing the job duration and making log_avg_msec larger.
I will increase the job duration from 100ms to 2s, and the log_avg_msec from
20ms to 100ms. I hope this is enough to avoid the failure.
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 9/9] t/jobs/t0034: add test for the log_issue_time option
2024-08-27 7:08 [PATCH 0/9] introduce the log_issue_time option Shin'ichiro Kawasaki
` (7 preceding siblings ...)
2024-08-27 7:08 ` [PATCH 8/9] t/jobs/t0033: add test for the log file format Shin'ichiro Kawasaki
@ 2024-08-27 7:08 ` Shin'ichiro Kawasaki
2024-08-28 18:47 ` Vincent Fu
2024-08-28 17:06 ` [PATCH 0/9] introduce " Jens Axboe
9 siblings, 1 reply; 25+ messages in thread
From: Shin'ichiro Kawasaki @ 2024-08-27 7:08 UTC (permalink / raw)
To: fio, Jens Axboe, Vincent Fu; +Cc: Damien Le Moal, Shin'ichiro Kawasaki
Add a test to check the newly added option 'log_issue_time'. Generate
log files using the option and check that lines in the log files have
the format described in the "Log File Format" section in HOWTO.rst.
This test case has the logic same as t0033 except the log file names and
matching patterns. Factor out the logic to the new class
FioJobFileTest_LogFileFormat.
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
---
t/jobs/t0034.fio | 27 ++++++++++++++++++++++++
t/run-fio-tests.py | 51 +++++++++++++++++++++++++++++++++++++---------
2 files changed, 68 insertions(+), 10 deletions(-)
create mode 100644 t/jobs/t0034.fio
diff --git a/t/jobs/t0034.fio b/t/jobs/t0034.fio
new file mode 100644
index 00000000..ca71b775
--- /dev/null
+++ b/t/jobs/t0034.fio
@@ -0,0 +1,27 @@
+[global]
+rw=read
+filename=t0034file
+size=8k
+time_based
+runtime=100ms
+ioengine=libaio
+iodepth=1
+
+[job1]
+write_lat_log=log
+log_offset=1
+log_issue_time=1
+
+[job2]
+write_lat_log=log
+log_offset=1
+log_issue_time=1
+log_avg_msec=20
+log_window_value=both
+
+[job3]
+write_lat_log=log
+write_bw_log=log
+write_iops_log=log
+log_offset=1
+log_issue_time=1
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index 8bc343cd..525f78ff 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -554,29 +554,50 @@ class FioJobFileTest_t0029(FioJobFileTest):
if self.json_data['jobs'][1]['read']['io_kbytes'] != 8:
self.passed = False
-class FioJobFileTest_t0033(FioJobFileTest):
+class FioJobFileTest_LogFileFormat(FioJobFileTest):
"""Test log file format"""
+ def setup(self, *args, **kws):
+ super().setup(*args, **kws)
+ self.patterns = {}
+
def check_result(self):
super().check_result()
if not self.passed:
return
- patterns = {
+ for logfile in self.patterns.keys():
+ file_path = os.path.join(self.paths['test_dir'], logfile)
+ with open(file_path, "r") as f:
+ line = f.readline()
+ if not re.match(self.patterns[logfile], line):
+ self.passed = False
+ self.failure_reason = "wrong log file format: " + logfile
+ return
+
+class FioJobFileTest_t0033(FioJobFileTest_LogFileFormat):
+ """Test log file format"""
+ def setup(self, *args, **kws):
+ super().setup(*args, **kws)
+ self.patterns = {
'log_bw.1.log': '\\d+, \\d+, \\d+, \\d+, 0x[\\da-f]+\\n',
'log_clat.2.log': '\\d+, \\d+, \\d+, \\d+, 0, \\d+\\n',
'log_iops.3.log': '\\d+, \\d+, \\d+, \\d+, \\d+, 0x[\\da-f]+\\n',
'log_iops.4.log': '\\d+, \\d+, \\d+, \\d+, 0, 0, \\d+\\n',
}
- for logfile in patterns.keys():
- file_path = os.path.join(self.paths['test_dir'], logfile)
- with open(file_path, "r") as f:
- line = f.readline()
- if not re.match(patterns[logfile], line):
- self.passed = False
- self.failure_reason = "wrong log file format: " + logfile
- return
+class FioJobFileTest_t0034(FioJobFileTest_LogFileFormat):
+ """Test log file format"""
+ def setup(self, *args, **kws):
+ super().setup(*args, **kws)
+ self.patterns = {
+ 'log_clat.1.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, \\d+\\n',
+ 'log_slat.1.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, \\d+\\n',
+ 'log_lat.1.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, 0\\n',
+ 'log_clat.2.log': '\\d+, \\d+, \\d+, \\d+, 0, 0, \\d+, 0\\n',
+ 'log_bw.3.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, 0\\n',
+ 'log_iops.3.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, 0\\n',
+ }
class FioJobFileTest_iops_rate(FioJobFileTest):
"""Test consists of fio test job t0011
@@ -913,6 +934,16 @@ TEST_LIST = [
'pre_success': SUCCESS_DEFAULT,
'requirements': [],
},
+ {
+ 'test_id': 34,
+ 'test_class': FioJobFileTest_t0034,
+ 'job': 't0034.fio',
+ 'success': SUCCESS_DEFAULT,
+ 'pre_job': None,
+ 'pre_success': None,
+ 'pre_success': SUCCESS_DEFAULT,
+ 'requirements': [],
+ },
{
'test_id': 1000,
'test_class': FioExeTest,
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread* Re: [PATCH 9/9] t/jobs/t0034: add test for the log_issue_time option
2024-08-27 7:08 ` [PATCH 9/9] t/jobs/t0034: add test for the log_issue_time option Shin'ichiro Kawasaki
@ 2024-08-28 18:47 ` Vincent Fu
0 siblings, 0 replies; 25+ messages in thread
From: Vincent Fu @ 2024-08-28 18:47 UTC (permalink / raw)
To: Shin'ichiro Kawasaki, fio, Jens Axboe; +Cc: Damien Le Moal
On 8/27/24 03:08, Shin'ichiro Kawasaki wrote:
> Add a test to check the newly added option 'log_issue_time'. Generate
> log files using the option and check that lines in the log files have
> the format described in the "Log File Format" section in HOWTO.rst.
>
> This test case has the logic same as t0033 except the log file names and
> matching patterns. Factor out the logic to the new class
> FioJobFileTest_LogFileFormat.
>
> Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
> ---
> t/jobs/t0034.fio | 27 ++++++++++++++++++++++++
> t/run-fio-tests.py | 51 +++++++++++++++++++++++++++++++++++++---------
> 2 files changed, 68 insertions(+), 10 deletions(-)
> create mode 100644 t/jobs/t0034.fio
>
> diff --git a/t/jobs/t0034.fio b/t/jobs/t0034.fio
> new file mode 100644
> index 00000000..ca71b775
> --- /dev/null
> +++ b/t/jobs/t0034.fio
> @@ -0,0 +1,27 @@
> +[global]
> +rw=read
> +filename=t0034file
> +size=8k
> +time_based
> +runtime=100ms
> +ioengine=libaio
> +iodepth=1
> +
> +[job1]
> +write_lat_log=log
> +log_offset=1
> +log_issue_time=1
> +
> +[job2]
> +write_lat_log=log
> +log_offset=1
> +log_issue_time=1
> +log_avg_msec=20
> +log_window_value=both
> +
> +[job3]
> +write_lat_log=log
> +write_bw_log=log
> +write_iops_log=log
> +log_offset=1
> +log_issue_time=1
> diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
> index 8bc343cd..525f78ff 100755
> --- a/t/run-fio-tests.py
> +++ b/t/run-fio-tests.py
> @@ -554,29 +554,50 @@ class FioJobFileTest_t0029(FioJobFileTest):
> if self.json_data['jobs'][1]['read']['io_kbytes'] != 8:
> self.passed = False
>
> -class FioJobFileTest_t0033(FioJobFileTest):
> +class FioJobFileTest_LogFileFormat(FioJobFileTest):
> """Test log file format"""
> + def setup(self, *args, **kws):
> + super().setup(*args, **kws)
> + self.patterns = {}
> +
> def check_result(self):
> super().check_result()
>
> if not self.passed:
> return
>
> - patterns = {
> + for logfile in self.patterns.keys():
> + file_path = os.path.join(self.paths['test_dir'], logfile)
> + with open(file_path, "r") as f:
> + line = f.readline()
> + if not re.match(self.patterns[logfile], line):
> + self.passed = False
> + self.failure_reason = "wrong log file format: " + logfile
> + return
> +
> +class FioJobFileTest_t0033(FioJobFileTest_LogFileFormat):
> + """Test log file format"""
> + def setup(self, *args, **kws):
> + super().setup(*args, **kws)
> + self.patterns = {
> 'log_bw.1.log': '\\d+, \\d+, \\d+, \\d+, 0x[\\da-f]+\\n',
> 'log_clat.2.log': '\\d+, \\d+, \\d+, \\d+, 0, \\d+\\n',
> 'log_iops.3.log': '\\d+, \\d+, \\d+, \\d+, \\d+, 0x[\\da-f]+\\n',
> 'log_iops.4.log': '\\d+, \\d+, \\d+, \\d+, 0, 0, \\d+\\n',
> }
>
> - for logfile in patterns.keys():
> - file_path = os.path.join(self.paths['test_dir'], logfile)
> - with open(file_path, "r") as f:
> - line = f.readline()
> - if not re.match(patterns[logfile], line):
> - self.passed = False
> - self.failure_reason = "wrong log file format: " + logfile
> - return
> +class FioJobFileTest_t0034(FioJobFileTest_LogFileFormat):
> + """Test log file format"""
> + def setup(self, *args, **kws):
> + super().setup(*args, **kws)
> + self.patterns = {
> + 'log_clat.1.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, \\d+\\n',
> + 'log_slat.1.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, \\d+\\n',
> + 'log_lat.1.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, 0\\n',
> + 'log_clat.2.log': '\\d+, \\d+, \\d+, \\d+, 0, 0, \\d+, 0\\n',
> + 'log_bw.3.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, 0\\n',
> + 'log_iops.3.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, 0\\n',
> + }
>
> class FioJobFileTest_iops_rate(FioJobFileTest):
> """Test consists of fio test job t0011
> @@ -913,6 +934,16 @@ TEST_LIST = [
> 'pre_success': SUCCESS_DEFAULT,
> 'requirements': [],
> },
> + {
> + 'test_id': 34,
> + 'test_class': FioJobFileTest_t0034,
> + 'job': 't0034.fio',
> + 'success': SUCCESS_DEFAULT,
> + 'pre_job': None,
> + 'pre_success': None,
> + 'pre_success': SUCCESS_DEFAULT,
> + 'requirements': [],
[Requirements.linux, Requirements.libaio] is needed here as well.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 0/9] introduce the log_issue_time option
2024-08-27 7:08 [PATCH 0/9] introduce the log_issue_time option Shin'ichiro Kawasaki
` (8 preceding siblings ...)
2024-08-27 7:08 ` [PATCH 9/9] t/jobs/t0034: add test for the log_issue_time option Shin'ichiro Kawasaki
@ 2024-08-28 17:06 ` Jens Axboe
2024-08-29 0:59 ` Damien Le Moal
9 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2024-08-28 17:06 UTC (permalink / raw)
To: Shin'ichiro Kawasaki, fio, Vincent Fu; +Cc: Damien Le Moal
On 8/27/24 1:08 AM, Shin'ichiro Kawasaki wrote:
> When write_lat_log option is set, fio outputs the 'clat' completion
> latency log file. This clat log can be used to analyze IO latency. This
> file can also be considered as an IO trace file as each IO entry can
> fully describe an IO when the --log_offset and --log_prio options are
> also used. However, using the clat log file as an IO trace is inaccurate
> because the time field of each entry uses millisecond units, which is
> too coarse for fast IOs (e.g. when using SSDs). This series proposes the
> new option 'log_issue_time' to address the inaccuracy problem. When this
> option is set, fio adds another field to the log file entries to log an
> IO issuing time in nanoseconds.
>
> The first four patches in the series clean up the current code as
> preparation. The 5th patch introduces the 'log_issue_time' option. The
> following two patches document the new option. The last two patches add
> two test cases to check the log file format.
In general this looks fine, and I think it's a useful feature. However,
should we have an option to make the generic time in the log entry be
in nsec as well?
--
Jens Axboe
^ permalink raw reply [flat|nested] 25+ messages in thread* Re: [PATCH 0/9] introduce the log_issue_time option
2024-08-28 17:06 ` [PATCH 0/9] introduce " Jens Axboe
@ 2024-08-29 0:59 ` Damien Le Moal
2024-08-29 1:49 ` Nick Neumann
2024-08-29 8:40 ` Shinichiro Kawasaki
0 siblings, 2 replies; 25+ messages in thread
From: Damien Le Moal @ 2024-08-29 0:59 UTC (permalink / raw)
To: Jens Axboe, Shin'ichiro Kawasaki, fio, Vincent Fu
On 8/29/24 02:06, Jens Axboe wrote:
> On 8/27/24 1:08 AM, Shin'ichiro Kawasaki wrote:
>> When write_lat_log option is set, fio outputs the 'clat' completion
>> latency log file. This clat log can be used to analyze IO latency. This
>> file can also be considered as an IO trace file as each IO entry can
>> fully describe an IO when the --log_offset and --log_prio options are
>> also used. However, using the clat log file as an IO trace is inaccurate
>> because the time field of each entry uses millisecond units, which is
>> too coarse for fast IOs (e.g. when using SSDs). This series proposes the
>> new option 'log_issue_time' to address the inaccuracy problem. When this
>> option is set, fio adds another field to the log file entries to log an
>> IO issuing time in nanoseconds.
>>
>> The first four patches in the series clean up the current code as
>> preparation. The 5th patch introduces the 'log_issue_time' option. The
>> following two patches document the new option. The last two patches add
>> two test cases to check the log file format.
>
> In general this looks fine, and I think it's a useful feature. However,
> should we have an option to make the generic time in the log entry be
> in nsec as well?
I am not sure if it is that useful given that the entry time field does not
directly correspond to any I/O time. But to be consistent with the other time
fields, I may be good to add this option. That is easy to do.
--
Damien Le Moal
Western Digital Research
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 0/9] introduce the log_issue_time option
2024-08-29 0:59 ` Damien Le Moal
@ 2024-08-29 1:49 ` Nick Neumann
2024-08-29 8:40 ` Shinichiro Kawasaki
1 sibling, 0 replies; 25+ messages in thread
From: Nick Neumann @ 2024-08-29 1:49 UTC (permalink / raw)
To: Damien Le Moal; +Cc: Jens Axboe, Shin'ichiro Kawasaki, fio, Vincent Fu
On Wed, Aug 28, 2024 at 7:59 PM Damien Le Moal <dlemoal@kernel.org> wrote:
> I am not sure if it is that useful given that the entry time field does not
> directly correspond to any I/O time. But to be consistent with the other time
> fields, I may be good to add this option. That is easy to do.
It could be nice. But I think there is also a discussion to be had
about just dropping the field (eventually) in a final single file,
every I/O file format, since the entry time field is fairly redundant
once issue time and latency are both in the same log. (Entry time =
issue time + latency + delay between io completion and logging)
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 0/9] introduce the log_issue_time option
2024-08-29 0:59 ` Damien Le Moal
2024-08-29 1:49 ` Nick Neumann
@ 2024-08-29 8:40 ` Shinichiro Kawasaki
1 sibling, 0 replies; 25+ messages in thread
From: Shinichiro Kawasaki @ 2024-08-29 8:40 UTC (permalink / raw)
To: Damien Le Moal; +Cc: Jens Axboe, fio@vger.kernel.org, Vincent Fu
On Aug 29, 2024 / 09:59, Damien Le Moal wrote:
> On 8/29/24 02:06, Jens Axboe wrote:
> > On 8/27/24 1:08 AM, Shin'ichiro Kawasaki wrote:
> >> When write_lat_log option is set, fio outputs the 'clat' completion
> >> latency log file. This clat log can be used to analyze IO latency. This
> >> file can also be considered as an IO trace file as each IO entry can
> >> fully describe an IO when the --log_offset and --log_prio options are
> >> also used. However, using the clat log file as an IO trace is inaccurate
> >> because the time field of each entry uses millisecond units, which is
> >> too coarse for fast IOs (e.g. when using SSDs). This series proposes the
> >> new option 'log_issue_time' to address the inaccuracy problem. When this
> >> option is set, fio adds another field to the log file entries to log an
> >> IO issuing time in nanoseconds.
> >>
> >> The first four patches in the series clean up the current code as
> >> preparation. The 5th patch introduces the 'log_issue_time' option. The
> >> following two patches document the new option. The last two patches add
> >> two test cases to check the log file format.
> >
> > In general this looks fine, and I think it's a useful feature. However,
> > should we have an option to make the generic time in the log entry be
> > in nsec as well?
>
> I am not sure if it is that useful given that the entry time field does not
> directly correspond to any I/O time.
I noted it in the commit message of the 5th patch, but did not in this cover
letter (sorry). Quote from the 5th patch commit message:
However, using the clat log file as an IO trace is inaccuate due to two
reasons. Firstly, the time field of each entry uses millisecond units,
which is too coarse for fast IOs (e.g. when using SSDs). Secondly, the
time field value is recorded not at command completion, but at log
sample recording. The time field value is slightly different from the IO
completion time. It can be used only as an approximated completion time.
That's why we propose to add the new field for the command issue time, instead
of the log entry in nsec.
> But to be consistent with the other time
> fields, I may be good to add this option. That is easy to do.
>
> --
> Damien Le Moal
> Western Digital Research
>
^ permalink raw reply [flat|nested] 25+ messages in thread