All of lore.kernel.org
 help / color / mirror / Atom feed
* [Ocfs2-devel] [PATCH 1/3 v2] ocfs2: Add ocfs2_trim_fs for SSD trim support.
  2011-03-07 10:02 [Ocfs2-devel] [PATCH 0/3] ocfs2: Add batched discard support Tao Ma
@ 2011-03-08 15:26 ` Tao Ma
  0 siblings, 0 replies; 9+ messages in thread
From: Tao Ma @ 2011-03-08 15:26 UTC (permalink / raw)
  To: ocfs2-devel

Changelog from v1 to v2:
1. remove the check for hard ro and soft ro.
2. fix bug found by tristan.
3. if range->len = 0 return 0 instead of -EINVAL.
4. allow minlen = 0 to go ahead instead of returning -EINVAL.

Regards,
Tao


From 9074413a619f1af32c03c5959d66ff465643496c Mon Sep 17 00:00:00 2001
From: Tao Ma <boyu.mt@taobao.com>
Date: Wed, 9 Mar 2011 07:12:10 +0800
Subject: [PATCH 1/3 v2] ocfs2: Add ocfs2_trim_fs for SSD trim support.

Add ocfs2_trim_fs to support trimming freed clusters in the
volume. A range will be given and all the freed clusters greater
than minlen will be discarded to the block layer.

Signed-off-by: Tao Ma <boyu.mt@taobao.com>
---
 fs/ocfs2/alloc.c |  156 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/alloc.h |    1 +
 2 files changed, 157 insertions(+), 0 deletions(-)

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index b27a0d8..0ff46d9 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -29,6 +29,7 @@
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/quotaops.h>
+#include <linux/blkdev.h>
 
 #include <cluster/masklog.h>
 
@@ -7184,3 +7185,158 @@ out_commit:
 out:
 	return ret;
 }
+
+static int ocfs2_trim_extent(struct super_block *sb,
+			     struct ocfs2_group_desc *gd,
+			     int start, int count)
+{
+	u64 discard;
+
+	count = ocfs2_clusters_to_blocks(sb, count);
+	discard = le64_to_cpu(gd->bg_blkno) +
+			ocfs2_clusters_to_blocks(sb, start);
+
+	return sb_issue_discard(sb, discard, count, GFP_NOFS, 0);
+}
+
+static int ocfs2_trim_group(struct super_block *sb,
+			    struct ocfs2_group_desc *gd,
+			    int start, int max, int minbits)
+{
+	int ret = 0, count = 0, next;
+	void *bitmap = gd->bg_bitmap;
+
+	while (start < max) {
+		start = ocfs2_find_next_zero_bit(bitmap, max, start);
+		if (start >= max)
+			break;
+		next = ocfs2_find_next_bit(bitmap, max, start);
+
+		if ((next - start) >= minbits) {
+			ret = ocfs2_trim_extent(sb, gd,
+						start, next - start);
+			if (ret < 0) {
+				mlog_errno(ret);
+				break;
+			}
+			count += next - start;
+		}
+		start = next + 1;
+
+		if (fatal_signal_pending(current)) {
+			count = -ERESTARTSYS;
+			break;
+		}
+
+		if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits)
+			break;
+	}
+
+	if (ret < 0)
+		count = ret;
+
+	return count;
+}
+
+int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+	struct ocfs2_super *osb = OCFS2_SB(sb);
+	u64 start, len, minlen, trimmed, first_group, last_group, group;
+	int ret, cnt, first_bit, last_bit;
+	struct buffer_head *main_bm_bh = NULL;
+	struct inode *main_bm_inode = NULL;
+	struct buffer_head *gd_bh = NULL;
+	struct ocfs2_dinode *main_bm;
+	struct ocfs2_group_desc *gd = NULL;
+
+	start = range->start >> osb->s_clustersize_bits;
+	len = range->len >> osb->s_clustersize_bits;
+	minlen = range->minlen >> osb->s_clustersize_bits;
+	trimmed = 0;
+
+	if (!len) {
+		range->len = 0;
+		return 0;
+	}
+
+	if (minlen >= osb->bitmap_cpg)
+		return -EINVAL;
+
+	main_bm_inode = ocfs2_get_system_file_inode(osb,
+						    GLOBAL_BITMAP_SYSTEM_INODE,
+						    OCFS2_INVALID_SLOT);
+	if (!main_bm_inode) {
+		ret = -EIO;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mutex_lock(&main_bm_inode->i_mutex);
+
+	ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_mutex;
+	}
+	main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
+
+	if (start >= le32_to_cpu(main_bm->i_clusters)) {
+		ret = -EINVAL;
+		mlog_errno(ret);
+		goto out_unlock;
+	}
+
+	if (start + len > le32_to_cpu(main_bm->i_clusters))
+		len = le32_to_cpu(main_bm->i_clusters) - start;
+
+	/* Determine first and last group to examine based on start and len */
+	first_group = ocfs2_which_cluster_group(main_bm_inode, start);
+	if (first_group == osb->first_cluster_group_blkno)
+		first_bit = start;
+	else
+		first_bit = start - ocfs2_blocks_to_clusters(sb, first_group);
+	last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
+	last_bit = osb->bitmap_cpg;
+
+	for (group = first_group; group <= last_group;) {
+		if (first_bit + len >= osb->bitmap_cpg)
+			last_bit = osb->bitmap_cpg;
+		else
+			last_bit = first_bit + len;
+
+		ret = ocfs2_read_group_descriptor(main_bm_inode,
+						  main_bm, group,
+						  &gd_bh);
+		if (ret < 0) {
+			mlog_errno(ret);
+			break;
+		}
+
+		gd = (struct ocfs2_group_desc *)gd_bh->b_data;
+		cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen);
+		brelse(gd_bh);
+		gd_bh = NULL;
+		if (cnt < 0) {
+			ret = cnt;
+			mlog_errno(ret);
+			break;
+		}
+
+		trimmed += cnt;
+		len -= osb->bitmap_cpg - first_bit;
+		first_bit = 0;
+		if (group == osb->first_cluster_group_blkno)
+			group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
+		else
+			group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
+	}
+	range->len = trimmed * sb->s_blocksize;
+out_unlock:
+	ocfs2_inode_unlock(main_bm_inode, 0);
+	brelse(main_bm_bh);
+out_mutex:
+	mutex_unlock(&main_bm_inode->i_mutex);
+	iput(main_bm_inode);
+out:
+	return ret;
+}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 3bd08a0..ca381c5 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -239,6 +239,7 @@ int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
 		    struct buffer_head **leaf_bh);
 int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
 
+int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range);
 /*
  * Helper function to look at the # of clusters in an extent record.
  */
-- 
1.6.3.GIT

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 0/3 V2] ocfs2: Add batched discard support.
@ 2011-05-23  2:08 Tao Ma
  2011-05-23  2:36 ` [Ocfs2-devel] [PATCH 1/3 V2] ocfs2: Add ocfs2_trim_fs for SSD trim support Tao Ma
  0 siblings, 1 reply; 9+ messages in thread
From: Tao Ma @ 2011-05-23  2:08 UTC (permalink / raw)
  To: ocfs2-devel

Hi all,

changelog from v1 to v2:
integrated review advices from Sunil.

	These are the patches for adding batched discard support in ocfs2. I
have tested it with xfstests 251 and it passed.

btw, I have also run some tests against it(bonnie++, postmark, ffsb and
fs_mark) and there are no big difference before and after the discard.

Regards,
Tao

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 1/3 V2] ocfs2: Add ocfs2_trim_fs for SSD trim support.
  2011-05-23  2:08 [Ocfs2-devel] [PATCH 0/3 V2] ocfs2: Add batched discard support Tao Ma
@ 2011-05-23  2:36 ` Tao Ma
  2011-05-23  2:36   ` [Ocfs2-devel] [PATCH 2/3 V2] ocfs2: Add FITRIM ioctl Tao Ma
                     ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Tao Ma @ 2011-05-23  2:36 UTC (permalink / raw)
  To: ocfs2-devel

From: Tao Ma <boyu.mt@taobao.com>

Add ocfs2_trim_fs to support trimming freed clusters in the
volume. A range will be given and all the freed clusters greater
than minlen will be discarded to the block layer.

Signed-off-by: Tao Ma <boyu.mt@taobao.com>
---
 fs/ocfs2/alloc.c |  159 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/alloc.h |    1 +
 2 files changed, 160 insertions(+), 0 deletions(-)

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 48aa9c7..ae3ea78 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -29,6 +29,7 @@
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/quotaops.h>
+#include <linux/blkdev.h>
 
 #include <cluster/masklog.h>
 
@@ -7184,3 +7185,161 @@ out_commit:
 out:
 	return ret;
 }
+
+static int ocfs2_trim_extent(struct super_block *sb,
+			     struct ocfs2_group_desc *gd,
+			     u32 start, u32 count)
+{
+	u64 discard, bcount;
+
+	bcount = ocfs2_clusters_to_blocks(sb, count);
+	discard = le64_to_cpu(gd->bg_blkno) +
+			ocfs2_clusters_to_blocks(sb, start);
+
+	return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0);
+}
+
+static int ocfs2_trim_group(struct super_block *sb,
+			    struct ocfs2_group_desc *gd,
+			    u32 start, u32 max, u32 minbits)
+{
+	int ret = 0, count = 0, next;
+	void *bitmap = gd->bg_bitmap;
+
+	if (le16_to_cpu(gd->bg_free_bits_count) < minbits)
+		return 0;
+
+	while (start < max) {
+		start = ocfs2_find_next_zero_bit(bitmap, max, start);
+		if (start >= max)
+			break;
+		next = ocfs2_find_next_bit(bitmap, max, start);
+
+		if ((next - start) >= minbits) {
+			ret = ocfs2_trim_extent(sb, gd,
+						start, next - start);
+			if (ret < 0) {
+				mlog_errno(ret);
+				break;
+			}
+			count += next - start;
+		}
+		start = next + 1;
+
+		if (fatal_signal_pending(current)) {
+			count = -ERESTARTSYS;
+			break;
+		}
+
+		if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits)
+			break;
+	}
+
+	if (ret < 0)
+		count = ret;
+
+	return count;
+}
+
+int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+	struct ocfs2_super *osb = OCFS2_SB(sb);
+	u64 start, len, trimmed, first_group, last_group, group;
+	int ret, cnt;
+	u32 first_bit, last_bit, minlen;
+	struct buffer_head *main_bm_bh = NULL;
+	struct inode *main_bm_inode = NULL;
+	struct buffer_head *gd_bh = NULL;
+	struct ocfs2_dinode *main_bm;
+	struct ocfs2_group_desc *gd = NULL;
+
+	start = range->start >> osb->s_clustersize_bits;
+	len = range->len >> osb->s_clustersize_bits;
+	minlen = range->minlen >> osb->s_clustersize_bits;
+	trimmed = 0;
+
+	if (!len) {
+		range->len = 0;
+		return 0;
+	}
+
+	if (minlen >= osb->bitmap_cpg)
+		return -EINVAL;
+
+	main_bm_inode = ocfs2_get_system_file_inode(osb,
+						    GLOBAL_BITMAP_SYSTEM_INODE,
+						    OCFS2_INVALID_SLOT);
+	if (!main_bm_inode) {
+		ret = -EIO;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mutex_lock(&main_bm_inode->i_mutex);
+
+	ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_mutex;
+	}
+	main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
+
+	if (start >= le32_to_cpu(main_bm->i_clusters)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (start + len > le32_to_cpu(main_bm->i_clusters))
+		len = le32_to_cpu(main_bm->i_clusters) - start;
+
+	/* Determine first and last group to examine based on start and len */
+	first_group = ocfs2_which_cluster_group(main_bm_inode, start);
+	if (first_group == osb->first_cluster_group_blkno)
+		first_bit = start;
+	else
+		first_bit = start - ocfs2_blocks_to_clusters(sb, first_group);
+	last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
+	last_bit = osb->bitmap_cpg;
+
+	for (group = first_group; group <= last_group;) {
+		if (first_bit + len >= osb->bitmap_cpg)
+			last_bit = osb->bitmap_cpg;
+		else
+			last_bit = first_bit + len;
+
+		ret = ocfs2_read_group_descriptor(main_bm_inode,
+						  main_bm, group,
+						  &gd_bh);
+		if (ret < 0) {
+			mlog_errno(ret);
+			break;
+		}
+
+		gd = (struct ocfs2_group_desc *)gd_bh->b_data;
+		cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen);
+		brelse(gd_bh);
+		gd_bh = NULL;
+		if (cnt < 0) {
+			ret = cnt;
+			mlog_errno(ret);
+			break;
+		}
+
+		trimmed += cnt;
+		len -= osb->bitmap_cpg - first_bit;
+		first_bit = 0;
+		if (group == osb->first_cluster_group_blkno)
+			group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
+		else
+			group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
+	}
+	range->len = trimmed * sb->s_blocksize;
+out_unlock:
+	ocfs2_inode_unlock(main_bm_inode, 0);
+	brelse(main_bm_bh);
+out_mutex:
+	mutex_unlock(&main_bm_inode->i_mutex);
+	iput(main_bm_inode);
+out:
+	return ret;
+}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 3bd08a0..ca381c5 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -239,6 +239,7 @@ int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
 		    struct buffer_head **leaf_bh);
 int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
 
+int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range);
 /*
  * Helper function to look at the # of clusters in an extent record.
  */
-- 
1.6.3.GIT

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 2/3 V2] ocfs2: Add FITRIM ioctl.
  2011-05-23  2:36 ` [Ocfs2-devel] [PATCH 1/3 V2] ocfs2: Add ocfs2_trim_fs for SSD trim support Tao Ma
@ 2011-05-23  2:36   ` Tao Ma
  2011-05-23 19:33     ` Sunil Mushran
  2011-05-23  2:36   ` [Ocfs2-devel] [PATCH 3/3 V3] ocfs2: Add trace event for trim Tao Ma
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Tao Ma @ 2011-05-23  2:36 UTC (permalink / raw)
  To: ocfs2-devel

From: Tao Ma <boyu.mt@taobao.com>

Add the corresponding ioctl function for FITRIM.

Signed-off-by: Tao Ma <boyu.mt@taobao.com>
---
 fs/ocfs2/ioctl.c |   24 ++++++++++++++++++++++++
 1 files changed, 24 insertions(+), 0 deletions(-)

diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 8f13c59..312a28f 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -542,6 +542,29 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			return -EFAULT;
 
 		return ocfs2_info_handle(inode, &info, 0);
+	case FITRIM:
+	{
+		struct super_block *sb = inode->i_sb;
+		struct fstrim_range range;
+		int ret = 0;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&range, (struct fstrim_range *)arg,
+		    sizeof(range)))
+			return -EFAULT;
+
+		ret = ocfs2_trim_fs(sb, &range);
+		if (ret < 0)
+			return ret;
+
+		if (copy_to_user((struct fstrim_range *)arg, &range,
+		    sizeof(range)))
+			return -EFAULT;
+
+		return 0;
+	}
 	default:
 		return -ENOTTY;
 	}
@@ -569,6 +592,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	case OCFS2_IOC_GROUP_EXTEND:
 	case OCFS2_IOC_GROUP_ADD:
 	case OCFS2_IOC_GROUP_ADD64:
+	case FITRIM:
 		break;
 	case OCFS2_IOC_REFLINK:
 		if (copy_from_user(&args, (struct reflink_arguments *)arg,
-- 
1.6.3.GIT

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 3/3 V3] ocfs2: Add trace event for trim.
  2011-05-23  2:36 ` [Ocfs2-devel] [PATCH 1/3 V2] ocfs2: Add ocfs2_trim_fs for SSD trim support Tao Ma
  2011-05-23  2:36   ` [Ocfs2-devel] [PATCH 2/3 V2] ocfs2: Add FITRIM ioctl Tao Ma
@ 2011-05-23  2:36   ` Tao Ma
  2011-05-23 19:39     ` Sunil Mushran
  2011-05-23 19:33   ` [Ocfs2-devel] [PATCH 1/3 V2] ocfs2: Add ocfs2_trim_fs for SSD trim support Sunil Mushran
  2011-05-24  6:57   ` Joel Becker
  3 siblings, 1 reply; 9+ messages in thread
From: Tao Ma @ 2011-05-23  2:36 UTC (permalink / raw)
  To: ocfs2-devel

From: Tao Ma <boyu.mt@taobao.com>

Add the corresponding trace event for trim.

Signed-off-by: Tao Ma <boyu.mt@taobao.com>
---
 fs/ocfs2/alloc.c       |    7 +++++++
 fs/ocfs2/ocfs2_trace.h |   25 +++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ae3ea78..ed553c6 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7196,6 +7196,8 @@ static int ocfs2_trim_extent(struct super_block *sb,
 	discard = le64_to_cpu(gd->bg_blkno) +
 			ocfs2_clusters_to_blocks(sb, start);
 
+	trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount);
+
 	return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0);
 }
 
@@ -7209,6 +7211,9 @@ static int ocfs2_trim_group(struct super_block *sb,
 	if (le16_to_cpu(gd->bg_free_bits_count) < minbits)
 		return 0;
 
+	trace_ocfs2_trim_group((unsigned long long)le64_to_cpu(gd->bg_blkno),
+			       start, max, minbits);
+
 	while (start < max) {
 		start = ocfs2_find_next_zero_bit(bitmap, max, start);
 		if (start >= max)
@@ -7292,6 +7297,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
 	if (start + len > le32_to_cpu(main_bm->i_clusters))
 		len = le32_to_cpu(main_bm->i_clusters) - start;
 
+	trace_ocfs2_trim_fs(start, len, minlen);
+
 	/* Determine first and last group to examine based on start and len */
 	first_group = ocfs2_which_cluster_group(main_bm_inode, start);
 	if (first_group == osb->first_cluster_group_blkno)
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index a1dae5b..3b481f4 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -688,6 +688,31 @@ TRACE_EVENT(ocfs2_cache_block_dealloc,
 		  __entry->blkno, __entry->bit)
 );
 
+TRACE_EVENT(ocfs2_trim_extent,
+	TP_PROTO(struct super_block *sb, unsigned long long blk,
+		 unsigned long long count),
+	TP_ARGS(sb, blk, count),
+	TP_STRUCT__entry(
+		__field(int, dev_major)
+		__field(int, dev_minor)
+		__field(unsigned long long, blk)
+		__field(__u64,	count)
+	),
+	TP_fast_assign(
+		__entry->dev_major = MAJOR(sb->s_dev);
+		__entry->dev_minor = MINOR(sb->s_dev);
+		__entry->blk = blk;
+		__entry->count = count;
+	),
+	TP_printk("%d %d %llu %llu",
+		  __entry->dev_major, __entry->dev_minor,
+		  __entry->blk, __entry->count)
+);
+
+DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_trim_group);
+
+DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_fs);
+
 /* End of trace events for fs/ocfs2/alloc.c. */
 
 /* Trace events for fs/ocfs2/localalloc.c. */
-- 
1.6.3.GIT

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 1/3 V2] ocfs2: Add ocfs2_trim_fs for SSD trim support.
  2011-05-23  2:36 ` [Ocfs2-devel] [PATCH 1/3 V2] ocfs2: Add ocfs2_trim_fs for SSD trim support Tao Ma
  2011-05-23  2:36   ` [Ocfs2-devel] [PATCH 2/3 V2] ocfs2: Add FITRIM ioctl Tao Ma
  2011-05-23  2:36   ` [Ocfs2-devel] [PATCH 3/3 V3] ocfs2: Add trace event for trim Tao Ma
@ 2011-05-23 19:33   ` Sunil Mushran
  2011-05-24  6:57   ` Joel Becker
  3 siblings, 0 replies; 9+ messages in thread
From: Sunil Mushran @ 2011-05-23 19:33 UTC (permalink / raw)
  To: ocfs2-devel

Reviewed-by: Sunil Mushran<sunil.mushran@oracle.com>


On 05/22/2011 07:36 PM, Tao Ma wrote:
> From: Tao Ma<boyu.mt@taobao.com>
>
> Add ocfs2_trim_fs to support trimming freed clusters in the
> volume. A range will be given and all the freed clusters greater
> than minlen will be discarded to the block layer.
>
> Signed-off-by: Tao Ma<boyu.mt@taobao.com>
> ---
>   fs/ocfs2/alloc.c |  159 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   fs/ocfs2/alloc.h |    1 +
>   2 files changed, 160 insertions(+), 0 deletions(-)
>
> diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
> index 48aa9c7..ae3ea78 100644
> --- a/fs/ocfs2/alloc.c
> +++ b/fs/ocfs2/alloc.c
> @@ -29,6 +29,7 @@
>   #include<linux/highmem.h>
>   #include<linux/swap.h>
>   #include<linux/quotaops.h>
> +#include<linux/blkdev.h>
>
>   #include<cluster/masklog.h>
>
> @@ -7184,3 +7185,161 @@ out_commit:
>   out:
>   	return ret;
>   }
> +
> +static int ocfs2_trim_extent(struct super_block *sb,
> +			     struct ocfs2_group_desc *gd,
> +			     u32 start, u32 count)
> +{
> +	u64 discard, bcount;
> +
> +	bcount = ocfs2_clusters_to_blocks(sb, count);
> +	discard = le64_to_cpu(gd->bg_blkno) +
> +			ocfs2_clusters_to_blocks(sb, start);
> +
> +	return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0);
> +}
> +
> +static int ocfs2_trim_group(struct super_block *sb,
> +			    struct ocfs2_group_desc *gd,
> +			    u32 start, u32 max, u32 minbits)
> +{
> +	int ret = 0, count = 0, next;
> +	void *bitmap = gd->bg_bitmap;
> +
> +	if (le16_to_cpu(gd->bg_free_bits_count)<  minbits)
> +		return 0;
> +
> +	while (start<  max) {
> +		start = ocfs2_find_next_zero_bit(bitmap, max, start);
> +		if (start>= max)
> +			break;
> +		next = ocfs2_find_next_bit(bitmap, max, start);
> +
> +		if ((next - start)>= minbits) {
> +			ret = ocfs2_trim_extent(sb, gd,
> +						start, next - start);
> +			if (ret<  0) {
> +				mlog_errno(ret);
> +				break;
> +			}
> +			count += next - start;
> +		}
> +		start = next + 1;
> +
> +		if (fatal_signal_pending(current)) {
> +			count = -ERESTARTSYS;
> +			break;
> +		}
> +
> +		if ((le16_to_cpu(gd->bg_free_bits_count) - count)<  minbits)
> +			break;
> +	}
> +
> +	if (ret<  0)
> +		count = ret;
> +
> +	return count;
> +}
> +
> +int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
> +{
> +	struct ocfs2_super *osb = OCFS2_SB(sb);
> +	u64 start, len, trimmed, first_group, last_group, group;
> +	int ret, cnt;
> +	u32 first_bit, last_bit, minlen;
> +	struct buffer_head *main_bm_bh = NULL;
> +	struct inode *main_bm_inode = NULL;
> +	struct buffer_head *gd_bh = NULL;
> +	struct ocfs2_dinode *main_bm;
> +	struct ocfs2_group_desc *gd = NULL;
> +
> +	start = range->start>>  osb->s_clustersize_bits;
> +	len = range->len>>  osb->s_clustersize_bits;
> +	minlen = range->minlen>>  osb->s_clustersize_bits;
> +	trimmed = 0;
> +
> +	if (!len) {
> +		range->len = 0;
> +		return 0;
> +	}
> +
> +	if (minlen>= osb->bitmap_cpg)
> +		return -EINVAL;
> +
> +	main_bm_inode = ocfs2_get_system_file_inode(osb,
> +						    GLOBAL_BITMAP_SYSTEM_INODE,
> +						    OCFS2_INVALID_SLOT);
> +	if (!main_bm_inode) {
> +		ret = -EIO;
> +		mlog_errno(ret);
> +		goto out;
> +	}
> +
> +	mutex_lock(&main_bm_inode->i_mutex);
> +
> +	ret = ocfs2_inode_lock(main_bm_inode,&main_bm_bh, 0);
> +	if (ret<  0) {
> +		mlog_errno(ret);
> +		goto out_mutex;
> +	}
> +	main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
> +
> +	if (start>= le32_to_cpu(main_bm->i_clusters)) {
> +		ret = -EINVAL;
> +		goto out_unlock;
> +	}
> +
> +	if (start + len>  le32_to_cpu(main_bm->i_clusters))
> +		len = le32_to_cpu(main_bm->i_clusters) - start;
> +
> +	/* Determine first and last group to examine based on start and len */
> +	first_group = ocfs2_which_cluster_group(main_bm_inode, start);
> +	if (first_group == osb->first_cluster_group_blkno)
> +		first_bit = start;
> +	else
> +		first_bit = start - ocfs2_blocks_to_clusters(sb, first_group);
> +	last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
> +	last_bit = osb->bitmap_cpg;
> +
> +	for (group = first_group; group<= last_group;) {
> +		if (first_bit + len>= osb->bitmap_cpg)
> +			last_bit = osb->bitmap_cpg;
> +		else
> +			last_bit = first_bit + len;
> +
> +		ret = ocfs2_read_group_descriptor(main_bm_inode,
> +						  main_bm, group,
> +						&gd_bh);
> +		if (ret<  0) {
> +			mlog_errno(ret);
> +			break;
> +		}
> +
> +		gd = (struct ocfs2_group_desc *)gd_bh->b_data;
> +		cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen);
> +		brelse(gd_bh);
> +		gd_bh = NULL;
> +		if (cnt<  0) {
> +			ret = cnt;
> +			mlog_errno(ret);
> +			break;
> +		}
> +
> +		trimmed += cnt;
> +		len -= osb->bitmap_cpg - first_bit;
> +		first_bit = 0;
> +		if (group == osb->first_cluster_group_blkno)
> +			group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
> +		else
> +			group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
> +	}
> +	range->len = trimmed * sb->s_blocksize;
> +out_unlock:
> +	ocfs2_inode_unlock(main_bm_inode, 0);
> +	brelse(main_bm_bh);
> +out_mutex:
> +	mutex_unlock(&main_bm_inode->i_mutex);
> +	iput(main_bm_inode);
> +out:
> +	return ret;
> +}
> diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
> index 3bd08a0..ca381c5 100644
> --- a/fs/ocfs2/alloc.h
> +++ b/fs/ocfs2/alloc.h
> @@ -239,6 +239,7 @@ int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
>   		    struct buffer_head **leaf_bh);
>   int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
>
> +int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range);
>   /*
>    * Helper function to look at the # of clusters in an extent record.
>    */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 2/3 V2] ocfs2: Add FITRIM ioctl.
  2011-05-23  2:36   ` [Ocfs2-devel] [PATCH 2/3 V2] ocfs2: Add FITRIM ioctl Tao Ma
@ 2011-05-23 19:33     ` Sunil Mushran
  0 siblings, 0 replies; 9+ messages in thread
From: Sunil Mushran @ 2011-05-23 19:33 UTC (permalink / raw)
  To: ocfs2-devel

Reviewed-by: Sunil Mushran<sunil.mushran@oracle.com>


On 05/22/2011 07:36 PM, Tao Ma wrote:
> From: Tao Ma<boyu.mt@taobao.com>
>
> Add the corresponding ioctl function for FITRIM.
>
> Signed-off-by: Tao Ma<boyu.mt@taobao.com>
> ---
>   fs/ocfs2/ioctl.c |   24 ++++++++++++++++++++++++
>   1 files changed, 24 insertions(+), 0 deletions(-)
>
> diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
> index 8f13c59..312a28f 100644
> --- a/fs/ocfs2/ioctl.c
> +++ b/fs/ocfs2/ioctl.c
> @@ -542,6 +542,29 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>   			return -EFAULT;
>
>   		return ocfs2_info_handle(inode,&info, 0);
> +	case FITRIM:
> +	{
> +		struct super_block *sb = inode->i_sb;
> +		struct fstrim_range range;
> +		int ret = 0;
> +
> +		if (!capable(CAP_SYS_ADMIN))
> +			return -EPERM;
> +
> +		if (copy_from_user(&range, (struct fstrim_range *)arg,
> +		    sizeof(range)))
> +			return -EFAULT;
> +
> +		ret = ocfs2_trim_fs(sb,&range);
> +		if (ret<  0)
> +			return ret;
> +
> +		if (copy_to_user((struct fstrim_range *)arg,&range,
> +		    sizeof(range)))
> +			return -EFAULT;
> +
> +		return 0;
> +	}
>   	default:
>   		return -ENOTTY;
>   	}
> @@ -569,6 +592,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
>   	case OCFS2_IOC_GROUP_EXTEND:
>   	case OCFS2_IOC_GROUP_ADD:
>   	case OCFS2_IOC_GROUP_ADD64:
> +	case FITRIM:
>   		break;
>   	case OCFS2_IOC_REFLINK:
>   		if (copy_from_user(&args, (struct reflink_arguments *)arg,

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 3/3 V3] ocfs2: Add trace event for trim.
  2011-05-23  2:36   ` [Ocfs2-devel] [PATCH 3/3 V3] ocfs2: Add trace event for trim Tao Ma
@ 2011-05-23 19:39     ` Sunil Mushran
  0 siblings, 0 replies; 9+ messages in thread
From: Sunil Mushran @ 2011-05-23 19:39 UTC (permalink / raw)
  To: ocfs2-devel

Reviewed-by: Sunil Mushran<sunil.mushran@oracle.com>



On 05/22/2011 07:36 PM, Tao Ma wrote:
> From: Tao Ma<boyu.mt@taobao.com>
>
> Add the corresponding trace event for trim.
>
> Signed-off-by: Tao Ma<boyu.mt@taobao.com>
> ---
>   fs/ocfs2/alloc.c       |    7 +++++++
>   fs/ocfs2/ocfs2_trace.h |   25 +++++++++++++++++++++++++
>   2 files changed, 32 insertions(+), 0 deletions(-)
>
> diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
> index ae3ea78..ed553c6 100644
> --- a/fs/ocfs2/alloc.c
> +++ b/fs/ocfs2/alloc.c
> @@ -7196,6 +7196,8 @@ static int ocfs2_trim_extent(struct super_block *sb,
>   	discard = le64_to_cpu(gd->bg_blkno) +
>   			ocfs2_clusters_to_blocks(sb, start);
>
> +	trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount);
> +
>   	return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0);
>   }
>
> @@ -7209,6 +7211,9 @@ static int ocfs2_trim_group(struct super_block *sb,
>   	if (le16_to_cpu(gd->bg_free_bits_count)<  minbits)
>   		return 0;
>
> +	trace_ocfs2_trim_group((unsigned long long)le64_to_cpu(gd->bg_blkno),
> +			       start, max, minbits);
> +
>   	while (start<  max) {
>   		start = ocfs2_find_next_zero_bit(bitmap, max, start);
>   		if (start>= max)
> @@ -7292,6 +7297,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
>   	if (start + len>  le32_to_cpu(main_bm->i_clusters))
>   		len = le32_to_cpu(main_bm->i_clusters) - start;
>
> +	trace_ocfs2_trim_fs(start, len, minlen);
> +
>   	/* Determine first and last group to examine based on start and len */
>   	first_group = ocfs2_which_cluster_group(main_bm_inode, start);
>   	if (first_group == osb->first_cluster_group_blkno)
> diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
> index a1dae5b..3b481f4 100644
> --- a/fs/ocfs2/ocfs2_trace.h
> +++ b/fs/ocfs2/ocfs2_trace.h
> @@ -688,6 +688,31 @@ TRACE_EVENT(ocfs2_cache_block_dealloc,
>   		  __entry->blkno, __entry->bit)
>   );
>
> +TRACE_EVENT(ocfs2_trim_extent,
> +	TP_PROTO(struct super_block *sb, unsigned long long blk,
> +		 unsigned long long count),
> +	TP_ARGS(sb, blk, count),
> +	TP_STRUCT__entry(
> +		__field(int, dev_major)
> +		__field(int, dev_minor)
> +		__field(unsigned long long, blk)
> +		__field(__u64,	count)
> +	),
> +	TP_fast_assign(
> +		__entry->dev_major = MAJOR(sb->s_dev);
> +		__entry->dev_minor = MINOR(sb->s_dev);
> +		__entry->blk = blk;
> +		__entry->count = count;
> +	),
> +	TP_printk("%d %d %llu %llu",
> +		  __entry->dev_major, __entry->dev_minor,
> +		  __entry->blk, __entry->count)
> +);
> +
> +DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_trim_group);
> +
> +DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_fs);
> +
>   /* End of trace events for fs/ocfs2/alloc.c. */
>
>   /* Trace events for fs/ocfs2/localalloc.c. */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 1/3 V2] ocfs2: Add ocfs2_trim_fs for SSD trim support.
  2011-05-23  2:36 ` [Ocfs2-devel] [PATCH 1/3 V2] ocfs2: Add ocfs2_trim_fs for SSD trim support Tao Ma
                     ` (2 preceding siblings ...)
  2011-05-23 19:33   ` [Ocfs2-devel] [PATCH 1/3 V2] ocfs2: Add ocfs2_trim_fs for SSD trim support Sunil Mushran
@ 2011-05-24  6:57   ` Joel Becker
  3 siblings, 0 replies; 9+ messages in thread
From: Joel Becker @ 2011-05-24  6:57 UTC (permalink / raw)
  To: ocfs2-devel

On Mon, May 23, 2011 at 10:36:43AM +0800, Tao Ma wrote:
> From: Tao Ma <boyu.mt@taobao.com>
> 
> Add ocfs2_trim_fs to support trimming freed clusters in the
> volume. A range will be given and all the freed clusters greater
> than minlen will be discarded to the block layer.
> 
> Signed-off-by: Tao Ma <boyu.mt@taobao.com>

	The TRIM patches are now in the merge-window branch of
ocfs2.git.

Joel

-- 

"In the room the women come and go
 Talking of Michaelangelo."

			http://www.jlbec.org/
			jlbec at evilplan.org

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2011-05-24  6:57 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-05-23  2:08 [Ocfs2-devel] [PATCH 0/3 V2] ocfs2: Add batched discard support Tao Ma
2011-05-23  2:36 ` [Ocfs2-devel] [PATCH 1/3 V2] ocfs2: Add ocfs2_trim_fs for SSD trim support Tao Ma
2011-05-23  2:36   ` [Ocfs2-devel] [PATCH 2/3 V2] ocfs2: Add FITRIM ioctl Tao Ma
2011-05-23 19:33     ` Sunil Mushran
2011-05-23  2:36   ` [Ocfs2-devel] [PATCH 3/3 V3] ocfs2: Add trace event for trim Tao Ma
2011-05-23 19:39     ` Sunil Mushran
2011-05-23 19:33   ` [Ocfs2-devel] [PATCH 1/3 V2] ocfs2: Add ocfs2_trim_fs for SSD trim support Sunil Mushran
2011-05-24  6:57   ` Joel Becker
  -- strict thread matches above, loose matches on Subject: below --
2011-03-07 10:02 [Ocfs2-devel] [PATCH 0/3] ocfs2: Add batched discard support Tao Ma
2011-03-08 15:26 ` [Ocfs2-devel] [PATCH 1/3 v2] ocfs2: Add ocfs2_trim_fs for SSD trim support Tao Ma

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.