[PATCH] mke2fs: Inform user about ongoing discard

linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] mke2fs: Inform user about ongoing discard
@ 2010-09-15 14:32 Lukas Czerner
  0 siblings, 0 replies; 4+ messages in thread
From: Lukas Czerner @ 2010-09-15 14:32 UTC (permalink / raw)
  To: linux-ext4; +Cc: tytso, sandeen, lczerner

Since there are some slow SSD's out there and big thinly provisioned
storages on which it takes quite long to issue discard through whole
device, it would be nice to provide user the information about what is
going on and how long it will take (approximately).

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
---
 misc/mke2fs.c |   55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 55 insertions(+), 0 deletions(-)

diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index add7c0c..4062507 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -1901,6 +1901,55 @@ static int mke2fs_setup_tdb(const char *name, io_manager *io_ptr)
 #define BLKDISCARD	_IO(0x12,119)
 #endif
 
+/* On how big chunk of disk do we test discard (in percentage) */
+#define DISCARD_TEST_RANGE_PERC		1
+#define DISCARD_TEST_RANGE(x, y)	\
+	((((x) / 100) *			\
+	DISCARD_TEST_RANGE_PERC +	\
+	(y)-1 / (y)) * (y))
+#define DISCARD_TEST_COUNT		5
+
+static int check_discard_support(int fd, int blocksize, __u64 blocks,
+				 double *time)
+{
+	int ret = 0, i;
+	double tmp_time = 0;
+	__uint64_t range[2];
+	struct timeval tv_start, tv_stop;
+
+	range[0] = 0;
+	range[1] = DISCARD_TEST_RANGE(blocks, blocksize);
+
+	for (i = 0; i < DISCARD_TEST_COUNT; i++) {
+
+		if (gettimeofday(&tv_start, (struct timezone *) NULL) == -1)
+			perror("gettimeofday");
+
+		ret = ioctl(fd, BLKDISCARD, &range);
+		if (ret)
+			return ret;
+
+		if (gettimeofday(&tv_stop, (struct timezone *) NULL) == -1) {
+			perror("gettimeofday");
+			return ret;
+		}
+
+		if (0 == i)
+			printf(_("Discarding all blocks on device "));
+
+		/* time diff */
+		tmp_time += ((double) tv_stop.tv_sec +
+			(((double) tv_stop.tv_usec) * 0.000001)) -
+			((double) tv_start.tv_sec +
+			(((double) tv_start.tv_usec) * 0.000001));
+	}
+
+	*time = (tmp_time / DISCARD_TEST_COUNT) *
+		(100 / DISCARD_TEST_RANGE_PERC);
+
+	return ret;
+}
+
 static void mke2fs_discard_blocks(ext2_filsys fs)
 {
 	int fd;
@@ -1908,6 +1957,7 @@ static void mke2fs_discard_blocks(ext2_filsys fs)
 	int blocksize;
 	__u64 blocks;
 	__uint64_t range[2];
+	double time = 0;
 
 	blocks = ext2fs_blocks_count(fs->super);
 	blocksize = EXT2_BLOCK_SIZE(fs->super);
@@ -1921,6 +1971,11 @@ static void mke2fs_discard_blocks(ext2_filsys fs)
 	 * optmization for SSDs or sparse storage.
 	 */
 	if (fd > 0) {
+		ret = check_discard_support(fd, blocksize, blocks, &time);
+		if (ret)
+			return;
+		printf(_("(Estimated time: %.2lfs).\n"), time);
+
 		ret = ioctl(fd, BLKDISCARD, &range);
 		if (verbose) {
 			printf(_("Calling BLKDISCARD from %llu to %llu "),
-- 
1.7.2.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] mke2fs: Inform user about ongoing discard
@ 2010-10-21 14:23 Lukas Czerner
  2010-11-22 20:20 ` Ted Ts'o
  0 siblings, 1 reply; 4+ messages in thread
From: Lukas Czerner @ 2010-10-21 14:23 UTC (permalink / raw)
  To: linux-ext4; +Cc: tytso, sandeen, adilger, lczerner

Since there are some slow SSD's out there and big thinly provisioned
storages on which it takes quite long to issue discard through whole
device, it would be nice to provide user the information about what is
going on and how long it will take (approximately).

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
---
 misc/mke2fs.c |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 56 insertions(+), 0 deletions(-)

diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 0980045..560df31 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -1907,6 +1907,56 @@ static int mke2fs_setup_tdb(const char *name, io_manager *io_ptr)
 #define BLKDISCARDZEROES _IO(0x12,124)
 #endif
 
+
+/* On how big chunk of disk do we test discard (in percentage) */
+#define DISCARD_TEST_RANGE_PERC		1
+#define DISCARD_TEST_RANGE(x, y)	\
+	((((x) / 100) *			\
+	DISCARD_TEST_RANGE_PERC +	\
+	(y)-1 / (y)) * (y))
+#define DISCARD_TEST_COUNT		5
+
+static int check_discard_support(int fd, int blocksize, __u64 blocks,
+				 double *time)
+{
+	int ret = 0, i;
+	double tmp_time = 0;
+	__uint64_t range[2];
+	struct timeval tv_start, tv_stop;
+
+	range[0] = 0;
+	range[1] = DISCARD_TEST_RANGE(blocks, blocksize);
+
+	for (i = 0; i < DISCARD_TEST_COUNT; i++) {
+
+		if (gettimeofday(&tv_start, (struct timezone *) NULL) == -1)
+			perror("gettimeofday");
+
+		ret = ioctl(fd, BLKDISCARD, &range);
+		if (ret)
+			return ret;
+
+		if (gettimeofday(&tv_stop, (struct timezone *) NULL) == -1) {
+			perror("gettimeofday");
+			return ret;
+		}
+
+		if (0 == i)
+			printf(_("Discarding all blocks on device "));
+
+		/* time diff */
+		tmp_time += ((double) tv_stop.tv_sec +
+			(((double) tv_stop.tv_usec) * 0.000001)) -
+			((double) tv_start.tv_sec +
+			(((double) tv_start.tv_usec) * 0.000001));
+	}
+
+	*time = (tmp_time / DISCARD_TEST_COUNT) *
+		(100 / DISCARD_TEST_RANGE_PERC);
+
+	return ret;
+}
+
 /*
  * Return zero if the discard succeeds, and -1 if the discard fails.
  */
@@ -1917,6 +1967,7 @@ static int mke2fs_discard_blocks(ext2_filsys fs)
 	int blocksize;
 	__u64 blocks;
 	__uint64_t range[2];
+	double time = 0;
 
 	blocks = ext2fs_blocks_count(fs->super);
 	blocksize = EXT2_BLOCK_SIZE(fs->super);
@@ -1926,6 +1977,11 @@ static int mke2fs_discard_blocks(ext2_filsys fs)
 	fd = open64(fs->device_name, O_RDWR);
 
 	if (fd > 0) {
+		ret = check_discard_support(fd, blocksize, blocks, &time);
+		if (ret)
+			return;
+		printf(_("(Estimated time: %.2lfs).\n"), time);
+
 		ret = ioctl(fd, BLKDISCARD, &range);
 		if (verbose) {
 			printf(_("Calling BLKDISCARD from %llu to %llu "),
-- 
1.7.2.3


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] mke2fs: Inform user about ongoing discard
  2010-10-21 14:23 Lukas Czerner
@ 2010-11-22 20:20 ` Ted Ts'o
  2010-11-23 17:48   ` Lukas Czerner
  0 siblings, 1 reply; 4+ messages in thread
From: Ted Ts'o @ 2010-11-22 20:20 UTC (permalink / raw)
  To: Lukas Czerner; +Cc: linux-ext4, sandeen, adilger

On Thu, Oct 21, 2010 at 04:23:02PM +0200, Lukas Czerner wrote:
> Since there are some slow SSD's out there and big thinly provisioned
> storages on which it takes quite long to issue discard through whole
> device, it would be nice to provide user the information about what is
> going on and how long it will take (approximately).
> 
> Signed-off-by: Lukas Czerner <lczerner@redhat.com>

Hi Lukas,

I've looked at this patch, and one thing that disturbs me about it ---
you are discarding the first percentage of the disk five percent times
for no good reason just to get the timing, before then executing the
discard for the entire disk.   There are a couple of problems with this:

*) For smart/competently implemented SSD's, discarding the same part
of the disk five times might lead to a misleading timing --- the smart
device could easily determine that the first 1% is already not in use
after the first discard, and the subsequent 4 discards could be
discard as no-ops.

*) Mark Lord has claimed that there exists a large number of
incomptently implemented SSD's out there, that may actually be
executing a flash erase of the discarded region.  If true, executing
an extra flash erase on 1% of the disk for no good reason five times
might not be the best thing to do for the longetivity of the device.

I was tempted to fix this up myself, but since I'm trying to get
better at delegating work to others, may I suggest the following
changes?

1)  Implement block device ioctl's for the kernel that export the
discard_granularity, discard_alignment, and max_discard_sectors.  

2) Change mke2fs so that the discard is done in a separate function.
Said function should attempt to fetch the discard_granularity,
discard_alignment, and max_discard_sectors.

3) This new function in mke2fs should start by discarding
approximately 1% of device at a time, respecting discard_granularity
and discard_alignment.  If the time to discard 1% of the device is
less than a second, then it should double the amount that it discards
at a time.  If the time to discard takes longer than 4 seconds, it
should reduce the amount that it discards by half (again, always
respecting discard_granularity and discard_alignment).  The function
can display the amount of time elapsed and the estimated amount of
time remaining after each chunk of the device that it discards,
assuming it can use ^M to redraw the progress report (which of course
should be suppressed if the -q option is specified on the command
line).

This design doesn't "waste" any discards, which is both faster and
reduces wear on badly designed SSD's.  It also continuously updates
the user with the amount of time it takes to complete the discard
process.  It also will respect the discard_granularity and
discard_alignment restrictions; and of course, it allows the user to
interrupt the discard, without needing a special kernel patch.

Does this make sense to you?

						- Ted

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] mke2fs: Inform user about ongoing discard
  2010-11-22 20:20 ` Ted Ts'o
@ 2010-11-23 17:48   ` Lukas Czerner
  0 siblings, 0 replies; 4+ messages in thread
From: Lukas Czerner @ 2010-11-23 17:48 UTC (permalink / raw)
  To: Ted Ts'o; +Cc: Lukas Czerner, linux-ext4, sandeen, adilger

On Mon, 22 Nov 2010, Ted Ts'o wrote:

> On Thu, Oct 21, 2010 at 04:23:02PM +0200, Lukas Czerner wrote:
> > Since there are some slow SSD's out there and big thinly provisioned
> > storages on which it takes quite long to issue discard through whole
> > device, it would be nice to provide user the information about what is
> > going on and how long it will take (approximately).
> > 
> > Signed-off-by: Lukas Czerner <lczerner@redhat.com>
> 
> Hi Lukas,
> 
> I've looked at this patch, and one thing that disturbs me about it ---
> you are discarding the first percentage of the disk five percent times
> for no good reason just to get the timing, before then executing the
> discard for the entire disk.   There are a couple of problems with this:
> 
> *) For smart/competently implemented SSD's, discarding the same part
> of the disk five times might lead to a misleading timing --- the smart
> device could easily determine that the first 1% is already not in use
> after the first discard, and the subsequent 4 discards could be
> discard as no-ops.
> 
> *) Mark Lord has claimed that there exists a large number of
> incomptently implemented SSD's out there, that may actually be
> executing a flash erase of the discarded region.  If true, executing
> an extra flash erase on 1% of the disk for no good reason five times
> might not be the best thing to do for the longetivity of the device.
> 
> I was tempted to fix this up myself, but since I'm trying to get
> better at delegating work to others, may I suggest the following
> changes?
> 
> 1)  Implement block device ioctl's for the kernel that export the
> discard_granularity, discard_alignment, and max_discard_sectors.  
> 
> 2) Change mke2fs so that the discard is done in a separate function.
> Said function should attempt to fetch the discard_granularity,
> discard_alignment, and max_discard_sectors.
> 
> 3) This new function in mke2fs should start by discarding
> approximately 1% of device at a time, respecting discard_granularity
> and discard_alignment.  If the time to discard 1% of the device is
> less than a second, then it should double the amount that it discards
> at a time.  If the time to discard takes longer than 4 seconds, it
> should reduce the amount that it discards by half (again, always
> respecting discard_granularity and discard_alignment).  The function
> can display the amount of time elapsed and the estimated amount of
> time remaining after each chunk of the device that it discards,
> assuming it can use ^M to redraw the progress report (which of course
> should be suppressed if the -q option is specified on the command
> line).
> 
> 
> This design doesn't "waste" any discards, which is both faster and
> reduces wear on badly designed SSD's.  It also continuously updates
> the user with the amount of time it takes to complete the discard
> process.  It also will respect the discard_granularity and
> discard_alignment restrictions; and of course, it allows the user to
> interrupt the discard, without needing a special kernel patch.
> 
> Does this make sense to you?
> 
> 						- Ted
> 

Hi Ted,

this absolutely make sense to me. I like the idea way better than what I
had done in my patch (actually I was probably lazy to do it right in the
first place:)). So, I'll add this into my todo list and hopefully find
some time to deal with it ASAP.

Thanks for suggestions!

-Lukas


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2010-11-23 17:48 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-15 14:32 [PATCH] mke2fs: Inform user about ongoing discard Lukas Czerner
  -- strict thread matches above, loose matches on Subject: below --
2010-10-21 14:23 Lukas Czerner
2010-11-22 20:20 ` Ted Ts'o
2010-11-23 17:48   ` Lukas Czerner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).