Non-uniform randomness with drifting

All of lore.kernel.org
 help / color / mirror / Atom feed

* Non-uniform randomness with drifting
@ 2015-01-07 23:32 Jens Axboe
  2015-01-08 13:22 ` Mark Nelson
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Jens Axboe @ 2015-01-07 23:32 UTC (permalink / raw)
  To: fio@vger.kernel.org

[-- Attachment #1: Type: text/plain, Size: 1864 bytes --]

Hi,

If you boil it down, fio can basically do two types of random 
distributions (random_distribution=):

- Uniform, meaning we scatter evenly across the IO range.
- Or zipf/pareto, meaning that we have some notion of hotness of
   offsets that are hit more often than others.

zipf/pareto are often used to simulate real world access patterns, 
where, eg, 5% of the dataset is hit 95% of the time, and having a long 
tail of rarely accessed data.

Something that's bothered me for a while is that a zipf/pareto 
distribution remains static over the runtime of the job. Real world 
workloads would often see a shift in what appears hot/cold and what 
isn't. So the attached patch is a first crude attempt at implementing 
that, and I'm posting it here to solicit ideas on how best to express 
such a shift in access patterns. The patch attached defines the 
following options:

random_drift	none, meaning the current behavior (static)
		sudden, meaning a sudden shift in the hot data
		gradual, meaning a gradual shift in the hot data

random_drift_start_percentage	0..100%. For example, if set to 50%, the
		hot/cold distribution would remain static until 50% of
		data has been accessed.

random_drift_percentage		0..100% For example, if set to 10%, the
		hot/cold distribution would shift 10% of the total size
		for every 10% of the workload accessed.

I'm thinking that random_drift_percentage should be split in two, so 
that we could say "shift X percent every time Y percent of the data has 
been accessed". But apart from that, any input on this? I'm open to 
suggestions on how to improve this, I think it's a feature that people 
evaluating caching solutions would be interested in in using.

An example job file would contain:

random_distribution=zipf
random_drift=gradual
random_drift_start_percentage=50
random_drift_percentage=10

-- 
Jens Axboe


[-- Attachment #2: fio-drift.patch --]
[-- Type: text/x-patch, Size: 7629 bytes --]

diff --git a/file.h b/file.h
index f7a1eae14408..93f9ee737bcf 100644
--- a/file.h
+++ b/file.h
@@ -95,6 +95,8 @@ struct fio_file {
 	uint64_t first_write;
 	uint64_t last_write;
 
+	uint64_t io_done;
+
 	/*
 	 * For use by the io engine
 	 */
@@ -120,6 +122,8 @@ struct fio_file {
 	 * Used for zipf random distribution
 	 */
 	struct zipf_state zipf;
+	uint64_t drift_offset;
+	unsigned int last_drift_perc;
 
 	int references;
 	enum fio_file_flags flags;
diff --git a/fio.h b/fio.h
index be2f23aa9f76..b017d2e5926b 100644
--- a/fio.h
+++ b/fio.h
@@ -642,6 +642,12 @@ enum {
 	FIO_RAND_DIST_PARETO,
 };
 
+enum {
+	FIO_RAND_DRIFT_NONE	= 0,
+	FIO_RAND_DRIFT_GRADUAL,
+	FIO_RAND_DRIFT_SUDDEN,
+};
+
 #define FIO_DEF_ZIPF		1.1
 #define FIO_DEF_PARETO		0.2
 
diff --git a/io_u.c b/io_u.c
index 23a9e4ada729..49dfa3792eea 100644
--- a/io_u.c
+++ b/io_u.c
@@ -130,11 +130,50 @@ ret:
 	return 0;
 }
 
+static uint64_t drift_offset(struct thread_data *td, struct fio_file *f)
+{
+	struct thread_options *o = &td->o;
+	unsigned int io_perc;
+
+	if (o->random_drift == FIO_RAND_DRIFT_NONE)
+		return 0;
+
+	if (!f->io_done)
+		return 0;
+
+	io_perc = 100 * f->io_done / f->io_size;
+	if (io_perc < o->drift_start_perc)
+		return 0;
+
+	io_perc -= o->drift_start_perc;
+	if (!io_perc)
+		return 0;
+
+	if (o->random_drift == FIO_RAND_DRIFT_GRADUAL) {
+		if (io_perc == f->last_drift_perc)
+			return 0;
+
+		f->drift_offset = f->io_size * io_perc / 100;
+		f->last_drift_perc = io_perc;
+	} else if (o->random_drift == FIO_RAND_DRIFT_SUDDEN) {
+		unsigned int o_io_perc = io_perc;
+
+		io_perc -= f->last_drift_perc;
+		if (io_perc < o->drift_perc)
+			return 0;
+
+		f->drift_offset += f->io_size * o->drift_perc / 100;
+		f->last_drift_perc = o_io_perc;
+	}
+
+	return f->drift_offset;
+}
+
 static int __get_next_rand_offset_zipf(struct thread_data *td,
 				       struct fio_file *f, enum fio_ddir ddir,
 				       uint64_t *b)
 {
-	*b = zipf_next(&f->zipf);
+	*b = zipf_next(&f->zipf, drift_offset(td, f));
 	return 0;
 }
 
@@ -142,7 +181,7 @@ static int __get_next_rand_offset_pareto(struct thread_data *td,
 					 struct fio_file *f, enum fio_ddir ddir,
 					 uint64_t *b)
 {
-	*b = pareto_next(&f->zipf);
+	*b = pareto_next(&f->zipf, drift_offset(td, f));
 	return 0;
 }
 
@@ -1648,6 +1687,8 @@ static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
 
 		if (!(io_u->flags & IO_U_F_VER_LIST))
 			td->this_io_bytes[ddir] += bytes;
+		if (f)
+			f->io_done += bytes;
 
 		if (ddir == DDIR_WRITE) {
 			if (f) {
diff --git a/lib/zipf.c b/lib/zipf.c
index c691bc51a5a5..1bfbcee2a549 100644
--- a/lib/zipf.c
+++ b/lib/zipf.c
@@ -50,11 +50,12 @@ void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta,
 	zipf_update(zs);
 }
 
-unsigned long long zipf_next(struct zipf_state *zs)
+unsigned long long zipf_next(struct zipf_state *zs, uint64_t offset)
 {
 	double alpha, eta, rand_uni, rand_z;
 	unsigned long long n = zs->nranges;
 	unsigned long long val;
+	uint64_t off = zs->rand_off + offset;
 
 	alpha = 1.0 / (1.0 - zs->theta);
 	eta = (1.0 - pow(2.0 / n, 1.0 - zs->theta)) / (1.0 - zs->zeta2 / zs->zetan);
@@ -69,7 +70,7 @@ unsigned long long zipf_next(struct zipf_state *zs)
 	else
 		val = 1 + (unsigned long long)(n * pow(eta*rand_uni - eta + 1.0, alpha));
 
-	return (__hash_u64(val - 1) + zs->rand_off) % zs->nranges;
+	return (__hash_u64(val - 1) + off) % zs->nranges;
 }
 
 void pareto_init(struct zipf_state *zs, unsigned long nranges, double h,
@@ -79,10 +80,11 @@ void pareto_init(struct zipf_state *zs, unsigned long nranges, double h,
 	zs->pareto_pow = log(h) / log(1.0 - h);
 }
 
-unsigned long long pareto_next(struct zipf_state *zs)
+unsigned long long pareto_next(struct zipf_state *zs, uint64_t offset)
 {
 	double rand = (double) __rand(&zs->rand) / (double) FRAND_MAX;
 	unsigned long long n = zs->nranges - 1;
+	uint64_t off = zs->rand_off + offset;
 
-	return (__hash_u64(n * pow(rand, zs->pareto_pow)) + zs->rand_off) % zs->nranges;
+	return (__hash_u64(n * pow(rand, zs->pareto_pow)) + off) % zs->nranges;
 }
diff --git a/lib/zipf.h b/lib/zipf.h
index f98ad8182883..43edcc5e78d2 100644
--- a/lib/zipf.h
+++ b/lib/zipf.h
@@ -15,9 +15,9 @@ struct zipf_state {
 };
 
 void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta, unsigned int seed);
-unsigned long long zipf_next(struct zipf_state *zs);
+unsigned long long zipf_next(struct zipf_state *zs, uint64_t off);
 
 void pareto_init(struct zipf_state *zs, unsigned long nranges, double h, unsigned int seed);
-unsigned long long pareto_next(struct zipf_state *zs);
+unsigned long long pareto_next(struct zipf_state *zs, uint64_t off);
 
 #endif
diff --git a/options.c b/options.c
index ab6e399db520..d4ebef0a7ee1 100644
--- a/options.c
+++ b/options.c
@@ -1880,6 +1880,51 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 		.group	= FIO_OPT_G_RANDOM,
 	},
 	{
+		.name	= "random_drift",
+		.type	= FIO_OPT_STR,
+		.off1	= td_var_offset(random_drift),
+		.help	= "Random offset drift type",
+		.def	= "none",
+		.posval	= {
+			  { .ival = "none",
+			    .oval = FIO_RAND_DRIFT_NONE,
+			    .help = "No drift",
+			  },
+			  { .ival = "gradual",
+			    .oval = FIO_RAND_DRIFT_GRADUAL,
+			    .help = "Gradual drift",
+			  },
+			  { .ival = "sudden",
+			    .oval = FIO_RAND_DRIFT_SUDDEN,
+			    .help = "Sudden drift",
+			  },
+		},
+		.category = FIO_OPT_C_IO,
+		.group	= FIO_OPT_G_RANDOM,
+	},
+	{
+		.name	= "random_drift_start_percentage",
+		.lname	= "Random drift start percentage",
+		.type	= FIO_OPT_INT,
+		.off1	= td_var_offset(drift_start_perc),
+		.help	= "Percentage of workload done before drifting",
+		.minval	= 0,
+		.maxval	= 100,
+		.category = FIO_OPT_C_IO,
+		.group	= FIO_OPT_G_INVALID,
+	},
+	{
+		.name	= "random_drift_percentage",
+		.lname	= "Random drift percentage",
+		.type	= FIO_OPT_INT,
+		.off1	= td_var_offset(drift_perc),
+		.help	= "Percentage of workload that is drifted",
+		.minval	= 0,
+		.maxval	= 100,
+		.category = FIO_OPT_C_IO,
+		.group	= FIO_OPT_G_INVALID,
+	},
+	{
 		.name	= "percentage_random",
 		.lname	= "Percentage Random",
 		.type	= FIO_OPT_INT,
diff --git a/t/genzipf.c b/t/genzipf.c
index c5f098c4c606..273fc62f5fbc 100644
--- a/t/genzipf.c
+++ b/t/genzipf.c
@@ -209,9 +209,9 @@ int main(int argc, char *argv[])
 		struct node *n;
 
 		if (dist_type == TYPE_ZIPF)
-			offset = zipf_next(&zs);
+			offset = zipf_next(&zs, 0);
 		else
-			offset = pareto_next(&zs);
+			offset = pareto_next(&zs, 0);
 
 		n = hash_lookup(offset);
 		if (n)
diff --git a/thread_options.h b/thread_options.h
index 611f8e7376fa..6f5451428051 100644
--- a/thread_options.h
+++ b/thread_options.h
@@ -127,6 +127,10 @@ struct thread_options {
 
 	unsigned int random_distribution;
 
+	unsigned int random_drift;
+	unsigned int drift_start_perc;
+	unsigned int drift_perc;
+
 	fio_fp64_t zipf_theta;
 	fio_fp64_t pareto_h;
 
@@ -353,7 +357,11 @@ struct thread_options_pack {
 	uint32_t bs_is_seq_rand;
 
 	uint32_t random_distribution;
-	uint32_t pad;
+
+	uint32_t random_drift;
+	uint32_t drift_start_perc;
+	uint32_t drift_perc;
+
 	fio_fp64_t zipf_theta;
 	fio_fp64_t pareto_h;
 
@@ -426,7 +434,7 @@ struct thread_options_pack {
 	uint64_t trim_backlog;
 	uint32_t clat_percentiles;
 	uint32_t percentile_precision;
-	uint32_t pad2;
+	uint32_t pad;
 	fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
 
 	uint8_t read_iolog_file[FIO_TOP_STR_MAX];
@@ -482,7 +490,7 @@ struct thread_options_pack {
 
 	uint64_t latency_target;
 	uint64_t latency_window;
-	uint32_t pad3;
+	uint32_t pad2;
 	fio_fp64_t latency_percentile;
 } __attribute__((packed));
 

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: Non-uniform randomness with drifting
  2015-01-07 23:32 Non-uniform randomness with drifting Jens Axboe
@ 2015-01-08 13:22 ` Mark Nelson
  2015-01-08 15:02 ` Alireza Haghdoost
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Mark Nelson @ 2015-01-08 13:22 UTC (permalink / raw)
  To: Jens Axboe, fio@vger.kernel.org

On 01/07/2015 05:32 PM, Jens Axboe wrote:
> Hi,
>
> If you boil it down, fio can basically do two types of random
> distributions (random_distribution=):
>
> - Uniform, meaning we scatter evenly across the IO range.
> - Or zipf/pareto, meaning that we have some notion of hotness of
>    offsets that are hit more often than others.
>
> zipf/pareto are often used to simulate real world access patterns,
> where, eg, 5% of the dataset is hit 95% of the time, and having a long
> tail of rarely accessed data.
>
> Something that's bothered me for a while is that a zipf/pareto
> distribution remains static over the runtime of the job. Real world
> workloads would often see a shift in what appears hot/cold and what
> isn't. So the attached patch is a first crude attempt at implementing
> that, and I'm posting it here to solicit ideas on how best to express
> such a shift in access patterns. The patch attached defines the
> following options:
>
> random_drift    none, meaning the current behavior (static)
>          sudden, meaning a sudden shift in the hot data
>          gradual, meaning a gradual shift in the hot data
>
> random_drift_start_percentage    0..100%. For example, if set to 50%, the
>          hot/cold distribution would remain static until 50% of
>          data has been accessed.
>
> random_drift_percentage        0..100% For example, if set to 10%, the
>          hot/cold distribution would shift 10% of the total size
>          for every 10% of the workload accessed.
>
> I'm thinking that random_drift_percentage should be split in two, so
> that we could say "shift X percent every time Y percent of the data has
> been accessed". But apart from that, any input on this? I'm open to
> suggestions on how to improve this, I think it's a feature that people
> evaluating caching solutions would be interested in in using.
>
> An example job file would contain:
>
> random_distribution=zipf
> random_drift=gradual
> random_drift_start_percentage=50
> random_drift_percentage=10
>

This is fantastic Jens!  We use zipf for testing our cache tiering 
implementation in Ceph.  I suspect that you are absolutely right that a 
slowly shifting distribution would be more accurate (and probably slower 
for us sadly).  I don't think I really have anything to add as it seems 
like you've got the things I'd want covered.  Good Job!

Thanks,
Mark


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Non-uniform randomness with drifting
  2015-01-07 23:32 Non-uniform randomness with drifting Jens Axboe
  2015-01-08 13:22 ` Mark Nelson
@ 2015-01-08 15:02 ` Alireza Haghdoost
  2015-01-08 15:25   ` Jens Axboe
  2015-01-08 16:59 ` Elliott, Robert (Server Storage)
  2015-01-10  9:26 ` Andrey Kuzmin
  3 siblings, 1 reply; 10+ messages in thread
From: Alireza Haghdoost @ 2015-01-08 15:02 UTC (permalink / raw)
  To: Jens Axboe; +Cc: fio@vger.kernel.org

On Wed, Jan 7, 2015 at 5:32 PM, Jens Axboe <axboe@kernel.dk> wrote:
> An example job file would contain:
>
> random_distribution=zipf
> random_drift=gradual
> random_drift_start_percentage=50
> random_drift_percentage=10

Jens,

This is an interesting proposal. Just to make sure if I understand
your example correctly, in this example you are proposing Gradual
shift  in hot/cold Blocks after 50% of workload is generated. This
shift then would be 10% total distribution for every 10% of remaining
workload access. It that correct ?

If I understand this correctly, the workload randomness distribution
would change after drift. For example if I start with zipf:1.2
initially, I would have different distribution after the drift which
is hard to describe. Would it be still Zipf with what theta parameter
?

Having said that, It would be more interesting and practical if we can
set specific distribution parameter for the drifted workload phases.
For example, Would be nice to divide the workload into 4 phases, phase
1: workload start with zipf:1.2, phase 2: workload drift to zipf:1.4,
phase 3: workload drift to pareto, phase 4:workload drift to uniform
distribution.

With this approach, we have more control on the workload randomness
distribution parameters for each gradual drift. Moreover, it would be
more practical to characterize a real workload and extract
distribution parameters for these phases and then feed them to fio for
synthetic re-generation of such a workload.

Hope it helps.
--
Alireza

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Non-uniform randomness with drifting
  2015-01-08 15:02 ` Alireza Haghdoost
@ 2015-01-08 15:25   ` Jens Axboe
  2015-01-08 16:07     ` Alireza Haghdoost
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2015-01-08 15:25 UTC (permalink / raw)
  To: Alireza Haghdoost; +Cc: fio@vger.kernel.org

On 01/08/2015 08:02 AM, Alireza Haghdoost wrote:
> On Wed, Jan 7, 2015 at 5:32 PM, Jens Axboe <axboe@kernel.dk> wrote:
>> An example job file would contain:
>>
>> random_distribution=zipf
>> random_drift=gradual
>> random_drift_start_percentage=50
>> random_drift_percentage=10
>
> Jens,
>
> This is an interesting proposal. Just to make sure if I understand
> your example correctly, in this example you are proposing Gradual
> shift  in hot/cold Blocks after 50% of workload is generated. This
> shift then would be 10% total distribution for every 10% of remaining
> workload access. It that correct ?

That is correct.

> If I understand this correctly, the workload randomness distribution
> would change after drift. For example if I start with zipf:1.2
> initially, I would have different distribution after the drift which
> is hard to describe. Would it be still Zipf with what theta parameter
> ?

After the drift, the zipf distribution would still be 1.2, it would just 
be a different set of blocks in the bands. The way it's implemented, it 
basically just shifts the logical offset in the drift. An example - lets 
say we have set the zipf to have the following distribution, using a 
small range for ease of representation:

0..4	95% of hits
5..9	5% of hits
10..14	2%
15..19	1%
20..24	1%
25..29	0.5%

We'll use the drift parameters from above, so once we've done 50% of the 
workload, we'll drift 10%. Since N is 30 here, that's a drift of 3. When 
that 10% drift is done, the distribution will look like this:

27..29 and 0..1		95% of hits
2..6			5% of hits
7..11			2%
12..16			1%
17..21			1%
22..26			0.5%

In other words, the distribution is identical, it's just a different set 
of blocks in the range. Fio hashes the linear blocks, so it won't be 0 
as the hottest, 1 as the next hottest, etc. That's just for simplicity 
in this example.

If you graph the distribution from 0..N, after the shift, the graph 
would look the same. It would just be offset to the right, with the long 
tail wrapped around.

> Having said that, It would be more interesting and practical if we can
> set specific distribution parameter for the drifted workload phases.
> For example, Would be nice to divide the workload into 4 phases, phase
> 1: workload start with zipf:1.2, phase 2: workload drift to zipf:1.4,
> phase 3: workload drift to pareto, phase 4:workload drift to uniform
> distribution.

I would not be adverse to drifting the zipf or pareto values, but I 
think it's orthogonal to this issue. You could imagine workloads where 
that is all you drift, or workloads where you both drift the LBA space 
and the zipf theta, for instance. Drifting between different 
distribution types (from zipf to pareto, or from pareto to uniform) is 
likely never going to be implemented, however.

> With this approach, we have more control on the workload randomness
> distribution parameters for each gradual drift. Moreover, it would be
> more practical to characterize a real workload and extract
> distribution parameters for these phases and then feed them to fio for
> synthetic re-generation of such a workload.

Definitely, the better you understand the workload, the better you can 
model. The above description is a linear (or sudden) drift of hotness, 
and an easy implementation of zipf theta drift would also be linear. 
Since we do have support for evaluating math, it would not be impossible 
to support having a functional description of the drift. But I think we 
should just keep it simple and support linear shifts.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Non-uniform randomness with drifting
  2015-01-08 15:25   ` Jens Axboe
@ 2015-01-08 16:07     ` Alireza Haghdoost
  2015-01-08 16:44       ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Alireza Haghdoost @ 2015-01-08 16:07 UTC (permalink / raw)
  To: Jens Axboe; +Cc: fio@vger.kernel.org

> In other words, the distribution is identical, it's just a different set of
> blocks in the range. Fio hashes the linear blocks, so it won't be 0 as the
> hottest, 1 as the next hottest, etc. That's just for simplicity in this
> example.

Thanks for describing the idea in the second example. I get a sense of
what you proposing now. I am just now sure about the application of
such a workload. From the caching point of view, it does not really
matter which LBA ranges are in 95% hit range. Specially these days
that caches are all fully associative and based on key-value store.
That is my impression that might be wrong. I think am not convinced
that 95% hit on 0-4 LBA range would have different caching behavior
compared with 27-29 and 0-1 range.

I agree with you that this LBA drift does not change zipf
distribution. But only if we look at certain portion of the workload.
For example, in the first portion of workload, it was a zipf:1.2 and
95% hit on 0-4 range, in the second phase it is still zipf:1.2 with
95% hit on the other range. Therefore, if we look at the workload as a
whole not just a portion of the workload, it would be a zipf that
receive less than 95% hit on the 0-4 range because the hot range has
been drifted in the second portion of the workload. Therefore, the
workload as a whole does not maintain the original zipf:1.2
distribution since original 95% hit on 0-4 range has been distributed
to other LBA ranges.

>
> I would not be adverse to drifting the zipf or pareto values, but I think
> it's orthogonal to this issue. You could imagine workloads where that is all
> you drift, or workloads where you both drift the LBA space and the zipf
> theta, for instance. Drifting between different distribution types (from
> zipf to pareto, or from pareto to uniform) is likely never going to be
> implemented, however.

Would it be possible to define 4 workers and associate each one to a
certain distribution then execute them in a sequence ? For example,
worker 1 with zipf:1.2 start from beginning to 25% of workload, worker
2 with zipf:1.4 start from the 25% to 50% of workload time, worker 3
with pareto start from 50% to 75% of the workload time and finally
worker 4 with uniform distribution start from 75% to the end of
workload time.

My point is that for caching workload, change in hot LBA range is less
important that change in distribution of requests to hot LBAs. For
example, a VDI workload expose a great temporal locality in the
morning during boot storm, then its temporal locality would reduce
since all virtual desktops are running with different applications
during normal business hours. Finally the temporal locality would
reduce to zero or uniform distribution over the night since most of
the clients are turned off or hybernated.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Non-uniform randomness with drifting
  2015-01-08 16:07     ` Alireza Haghdoost
@ 2015-01-08 16:44       ` Jens Axboe
  0 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2015-01-08 16:44 UTC (permalink / raw)
  To: Alireza Haghdoost; +Cc: fio@vger.kernel.org

On 01/08/2015 09:07 AM, Alireza Haghdoost wrote:
>> In other words, the distribution is identical, it's just a different set of
>> blocks in the range. Fio hashes the linear blocks, so it won't be 0 as the
>> hottest, 1 as the next hottest, etc. That's just for simplicity in this
>> example.
>
> Thanks for describing the idea in the second example. I get a sense of
> what you proposing now. I am just now sure about the application of
> such a workload. From the caching point of view, it does not really
> matter which LBA ranges are in 95% hit range. Specially these days
> that caches are all fully associative and based on key-value store.
> That is my impression that might be wrong. I think am not convinced
> that 95% hit on 0-4 LBA range would have different caching behavior
> compared with 27-29 and 0-1 range.

Lets take a classic example of having some slow big storage, with 5% of 
that capacity fronted by a much faster device. For that to be effective, 
you would assume that almost all the hot data access hits the faster 
caching device. If we drift the values that are accessed often, then we 
exercise the ability of the cache to adapt to the new working set.

> I agree with you that this LBA drift does not change zipf
> distribution. But only if we look at certain portion of the workload.
> For example, in the first portion of workload, it was a zipf:1.2 and
> 95% hit on 0-4 range, in the second phase it is still zipf:1.2 with
> 95% hit on the other range. Therefore, if we look at the workload as a
> whole not just a portion of the workload, it would be a zipf that
> receive less than 95% hit on the 0-4 range because the hot range has
> been drifted in the second portion of the workload. Therefore, the
> workload as a whole does not maintain the original zipf:1.2
> distribution since original 95% hit on 0-4 range has been distributed
> to other LBA ranges.

Yes, that is definitely true. Lets say we use the same 10% drift for 
each time period, t. And lets say we have drifted 10 times, through 
periods t1..t10. Graphing the access pattern for the entire period of 
t1..t10 would yield a flat equal distribution, and that surely isn't 
zipf:1.2. That is unavoidable with a drift like that. The point is that 
the distribution for the time period t1 would be zipf:1.2, and the 
distribution for t2 would also be zipf:1.2, they would just not be the 
same sets of data. The distribution of data only makes sense within the 
defined period of time, and that is also true of the performance seen. 
You can't drift too quickly, or they would be no point in doing so, you 
might as well just do uniformly random IO at that point.

>> I would not be adverse to drifting the zipf or pareto values, but I think
>> it's orthogonal to this issue. You could imagine workloads where that is all
>> you drift, or workloads where you both drift the LBA space and the zipf
>> theta, for instance. Drifting between different distribution types (from
>> zipf to pareto, or from pareto to uniform) is likely never going to be
>> implemented, however.
>
> Would it be possible to define 4 workers and associate each one to a
> certain distribution then execute them in a sequence ? For example,
> worker 1 with zipf:1.2 start from beginning to 25% of workload, worker
> 2 with zipf:1.4 start from the 25% to 50% of workload time, worker 3
> with pareto start from 50% to 75% of the workload time and finally
> worker 4 with uniform distribution start from 75% to the end of
> workload time.

Sure, you could do that right now, just define the 4 jobs with the 
desired settings, and have them execute serially by placing a stonewall 
between them.

> My point is that for caching workload, change in hot LBA range is less
> important that change in distribution of requests to hot LBAs. For

I don't think that is generally true. It's true for some types of 
caching workloads, like the VDI you describe below. If you're caching 
for a database workload, with the database holding store items or 
similar, a drift would more closely match. It's not quite perfect, but 
I'm not aiming for perfection here or we'd never get done. The drift 
expires everything in the hot end of the spectrum. For natural 
workloads, I would expect a decay of some of the hotter items, but some 
of them would likely persist over much longer intervals.

> example, a VDI workload expose a great temporal locality in the
> morning during boot storm, then its temporal locality would reduce
> since all virtual desktops are running with different applications
> during normal business hours. Finally the temporal locality would
> reduce to zero or uniform distribution over the night since most of
> the clients are turned off or hybernated.

A workload like that isn't really something you'd model with a drift. 
That's essentially three separate phases of the workload. Phase 1, boot 
storm, could be described with a zipf/pareto distribution, fairly 
closely. Phase 2 is probably more uniformly random, data set is a lot 
larger, though some locality would still be expected (I bet they all run 
Office, for instance, but they save/open different files). So perhaps 
phase 2 would work as zipf/pareto as well, just with a different input 
value. Phase 3 is basically idle, systems are off outside of the few sad 
souls burning the midnight oil.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: Non-uniform randomness with drifting
  2015-01-07 23:32 Non-uniform randomness with drifting Jens Axboe
  2015-01-08 13:22 ` Mark Nelson
  2015-01-08 15:02 ` Alireza Haghdoost
@ 2015-01-08 16:59 ` Elliott, Robert (Server Storage)
  2015-01-08 19:01   ` Alireza Haghdoost
  2015-01-10  9:26 ` Andrey Kuzmin
  3 siblings, 1 reply; 10+ messages in thread
From: Elliott, Robert (Server Storage) @ 2015-01-08 16:59 UTC (permalink / raw)
  To: Jens Axboe, fio@vger.kernel.org

> -----Original Message-----
> From: fio-owner@vger.kernel.org [mailto:fio-owner@vger.kernel.org] On
> Behalf Of Jens Axboe
> Sent: Wednesday, 07 January, 2015 5:33 PM
> To: fio@vger.kernel.org
> Subject: Non-uniform randomness with drifting
> 
> Hi,
> 
> If you boil it down, fio can basically do two types of random
> distributions (random_distribution=):
> 
> - Uniform, meaning we scatter evenly across the IO range.
> - Or zipf/pareto, meaning that we have some notion of hotness of
>    offsets that are hit more often than others.
> 
> zipf/pareto are often used to simulate real world access patterns,
> where, eg, 5% of the dataset is hit 95% of the time, and having a long
> tail of rarely accessed data.
> 

There are two dimensions of locality: spatial and temporal.

random_distribution= provides some spatial locality control. The
zipf distribution is not as "bursty" as many application workloads.

There is no direct temporal locality control.  rate= sets a maximum
rate, which essentially inserts some idle time (but the same for the
entire job).  

A 2002 paper by Mengzhi Wang, et al. at CMU discussed the importance of 
spatio-temporal correlation, at least in a storage trace provided by 
HP Labs.  They found "the intuition that requests arriving closely
in time tend to access nearby objects" held true and proposed a model
that incorporated that concept called PQRS.

2002 slides: http://www-cgi.cs.cmu.edu/~mzwang/research/pub/pqrs_slides.pdf
2002 paper: http://pdl.cmu.edu/PDL-FTP/Workload/performance02.pdf
2005 analysis slides: http://www.research.ibm.com/haifa/Workshops/systems-and-storage2005/papers/eitan_bachmat_pqrs.pdf
2005 analysis paper: http://www.cs.bgu.ac.il/~ebachmat/pqrs-14-4-2012.pdf

The new random_drift feature changes spatial locality over time (where
time means % of data that has been accessed), so it provides some
correlation control, but doesn't provide full temporal locality
control.

Maybe it'd be worth trying to implement the PQRS model and see if
that works well for both dimensions?

> I'm thinking that random_drift_percentage should be split in two, so
> that we could say "shift X percent every time Y percent of the data has
> been accessed". But apart from that, any input on this? I'm open to

If running on huge devices with time_based=1, it can take a long time
to reach a meaningful % of the total size.  % of time might be better.

---
Rob Elliott    HP Server Storage

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Non-uniform randomness with drifting
  2015-01-08 16:59 ` Elliott, Robert (Server Storage)
@ 2015-01-08 19:01   ` Alireza Haghdoost
  2015-01-08 20:14     ` Elliott, Robert (Server Storage)
  0 siblings, 1 reply; 10+ messages in thread
From: Alireza Haghdoost @ 2015-01-08 19:01 UTC (permalink / raw)
  To: Elliott, Robert (Server Storage); +Cc: Jens Axboe, fio@vger.kernel.org

On Thu, Jan 8, 2015 at 10:59 AM, Elliott, Robert (Server Storage)
<Elliott@hp.com> wrote:
> The new random_drift feature changes spatial locality over time (where
> time means % of data that has been accessed), so it provides some
> correlation control, but doesn't provide full temporal locality
> control.

Just to clarify, the temporal locality control that you are referring
to is more like burst control. You are not interested in a fixed IO
generation rate and interested to have dynamic IO rate changes. Is
that correct ?


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: Non-uniform randomness with drifting
  2015-01-08 19:01   ` Alireza Haghdoost
@ 2015-01-08 20:14     ` Elliott, Robert (Server Storage)
  0 siblings, 0 replies; 10+ messages in thread
From: Elliott, Robert (Server Storage) @ 2015-01-08 20:14 UTC (permalink / raw)
  To: Alireza Haghdoost; +Cc: Jens Axboe, fio@vger.kernel.org

> -----Original Message-----
> From: Alireza Haghdoost [mailto:haghdoost@gmail.com]
> Sent: Thursday, 08 January, 2015 1:01 PM
> To: Elliott, Robert (Server Storage)
> Cc: Jens Axboe; fio@vger.kernel.org
> Subject: Re: Non-uniform randomness with drifting
> 
> On Thu, Jan 8, 2015 at 10:59 AM, Elliott, Robert (Server Storage)
> <Elliott@hp.com> wrote:
> > The new random_drift feature changes spatial locality over time (where
> > time means % of data that has been accessed), so it provides some
> > correlation control, but doesn't provide full temporal locality
> > control.
> 
> Just to clarify, the temporal locality control that you are referring
> to is more like burst control. You are not interested in a fixed IO
> generation rate and interested to have dynamic IO rate changes. Is 
> that correct ?

Yes. That kind of temporal locality is necessary for write buffer/caches
that sit between a fast interface and a slow interface, where the buffer
is continuously destaged.  Assuming the long-term input rate matches the
output/destage rate (otherwise the buffer will inevitably become full),
the required buffer size to maintain maximum bandwidth depends on the
length of the bursts between the idles.

A simple fixed rate generator has uses too, but it often leads to overloading
the system or settling at less than the maximum possible rate.  That's how 
the SPC-1 benchmarks are designed; find the heaviest workload for which the
system maintains the targeted latency (fio's latency_target= option).

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Non-uniform randomness with drifting
  2015-01-07 23:32 Non-uniform randomness with drifting Jens Axboe
                   ` (2 preceding siblings ...)
  2015-01-08 16:59 ` Elliott, Robert (Server Storage)
@ 2015-01-10  9:26 ` Andrey Kuzmin
  3 siblings, 0 replies; 10+ messages in thread
From: Andrey Kuzmin @ 2015-01-10  9:26 UTC (permalink / raw)
  To: Jens Axboe; +Cc: fio@vger.kernel.org

On Thu, Jan 8, 2015 at 2:32 AM, Jens Axboe <axboe@kernel.dk> wrote:
> Hi,
>
> If you boil it down, fio can basically do two types of random distributions
> (random_distribution=):
>
> - Uniform, meaning we scatter evenly across the IO range.
> - Or zipf/pareto, meaning that we have some notion of hotness of
>   offsets that are hit more often than others.
>
> zipf/pareto are often used to simulate real world access patterns, where,
> eg, 5% of the dataset is hit 95% of the time, and having a long tail of
> rarely accessed data.
>
> Something that's bothered me for a while is that a zipf/pareto distribution
> remains static over the runtime of the job. Real world workloads would often
> see a shift in what appears hot/cold and what isn't. So the attached patch
> is a first crude attempt at implementing that, and I'm posting it here to
> solicit ideas on how best to express such a shift in access patterns. The
> patch attached defines the following options:
>
> random_drift    none, meaning the current behavior (static)
>                 sudden, meaning a sudden shift in the hot data
>                 gradual, meaning a gradual shift in the hot data
>
> random_drift_start_percentage   0..100%. For example, if set to 50%, the
>                 hot/cold distribution would remain static until 50% of
>                 data has been accessed.
>
> random_drift_percentage         0..100% For example, if set to 10%, the
>                 hot/cold distribution would shift 10% of the total size
>                 for every 10% of the workload accessed.
>
> I'm thinking that random_drift_percentage should be split in two, so that we
> could say "shift X percent every time Y percent of the data has been
> accessed". But apart from that, any input on this? I'm open to suggestions
> on how to improve this, I think it's a feature that people evaluating
> caching solutions would be interested in in using.

Great start to me, with a lot of use cases out-of-the-box. A concern,
probably a minor one, is that drift is global and applies to all data
directions and threads, while one might be interested to model
read/write or per thread behavior differently in this regard.

Further down the line, I'd give a thought to turning the base of the
distribution in use into a (say, uniform) random variable.
start_percentage and percentage then become the moments of the
distribution that models the base offset evolution in time (expressed
in either units of time or data volume accessed). An obvious use case
is modelling multiple clients with zipf distribution, with each zipf's
base independently evolving in a random fashion (a realistic model of
a file server load with multiple clients, each with a randomly moving
hotspot).

Regards,
Andrey

>
> An example job file would contain:
>
> random_distribution=zipf
> random_drift=gradual
> random_drift_start_percentage=50
> random_drift_percentage=10
>
> --
> Jens Axboe
>


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-01-10  9:26 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-01-07 23:32 Non-uniform randomness with drifting Jens Axboe
2015-01-08 13:22 ` Mark Nelson
2015-01-08 15:02 ` Alireza Haghdoost
2015-01-08 15:25   ` Jens Axboe
2015-01-08 16:07     ` Alireza Haghdoost
2015-01-08 16:44       ` Jens Axboe
2015-01-08 16:59 ` Elliott, Robert (Server Storage)
2015-01-08 19:01   ` Alireza Haghdoost
2015-01-08 20:14     ` Elliott, Robert (Server Storage)
2015-01-10  9:26 ` Andrey Kuzmin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.