linux-f2fs-devel.lists.sourceforge.net archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] f2fs: introduce cp_control structure
@ 2014-09-23  4:53 Jaegeuk Kim
  2014-09-23  4:53 ` [PATCH 2/3] f2fs: introduce FITRIM in f2fs_ioctl Jaegeuk Kim
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Jaegeuk Kim @ 2014-09-23  4:53 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch add a new data structure to control checkpoint parameters.
Currently, it presents the reason of checkpoint such as is_umount and normal
sync.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c        | 16 ++++++++--------
 fs/f2fs/f2fs.h              | 11 ++++++++++-
 fs/f2fs/gc.c                |  7 +++++--
 fs/f2fs/recovery.c          |  5 ++++-
 fs/f2fs/super.c             | 13 ++++++++++---
 include/trace/events/f2fs.h | 15 ++++++++++-----
 6 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index e519aaf..e401ffd 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -826,7 +826,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
 	finish_wait(&sbi->cp_wait, &wait);
 }
 
-static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
+static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
@@ -894,7 +894,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
 			orphan_blocks);
 
-	if (is_umount) {
+	if (cpc->reason == CP_UMOUNT) {
 		set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
 				cp_payload_blks + data_sum_blocks +
@@ -948,7 +948,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 
 	write_data_summaries(sbi, start_blk);
 	start_blk += data_sum_blocks;
-	if (is_umount) {
+	if (cpc->reason == CP_UMOUNT) {
 		write_node_summaries(sbi, start_blk);
 		start_blk += NR_CURSEG_NODE_TYPE;
 	}
@@ -988,12 +988,12 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 /*
  * We guarantee that this checkpoint procedure will not fail.
  */
-void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
+void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 	unsigned long long ckpt_ver;
 
-	trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
+	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
 
 	mutex_lock(&sbi->cp_mutex);
 
@@ -1004,7 +1004,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	if (block_operations(sbi))
 		goto out;
 
-	trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
+	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
 
 	f2fs_submit_merged_bio(sbi, DATA, WRITE);
 	f2fs_submit_merged_bio(sbi, NODE, WRITE);
@@ -1023,13 +1023,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	flush_sit_entries(sbi);
 
 	/* unlock all the fs_lock[] in do_checkpoint() */
-	do_checkpoint(sbi, is_umount);
+	do_checkpoint(sbi, cpc);
 
 	unblock_operations(sbi);
 	stat_inc_cp_count(sbi->stat_info);
 out:
 	mutex_unlock(&sbi->cp_mutex);
-	trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
+	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
 }
 
 void init_ino_entry_info(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 3b70b01..5298924 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -96,6 +96,15 @@ enum {
 	SIT_BITMAP
 };
 
+enum {
+	CP_UMOUNT,
+	CP_SYNC,
+};
+
+struct cp_control {
+	int reason;
+};
+
 /*
  * For CP/NAT/SIT/SSA readahead
  */
@@ -1314,7 +1323,7 @@ void update_dirty_page(struct inode *, struct page *);
 void add_dirty_dir_inode(struct inode *);
 void remove_dirty_dir_inode(struct inode *);
 void sync_dirty_dir_inodes(struct f2fs_sb_info *);
-void write_checkpoint(struct f2fs_sb_info *, bool);
+void write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
 void init_ino_entry_info(struct f2fs_sb_info *);
 int __init create_checkpoint_caches(void);
 void destroy_checkpoint_caches(void);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 7bf8392..e88fcf6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -694,6 +694,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
 	int gc_type = BG_GC;
 	int nfree = 0;
 	int ret = -1;
+	struct cp_control cpc = {
+		.reason = CP_SYNC,
+	};
 
 	INIT_LIST_HEAD(&ilist);
 gc_more:
@@ -704,7 +707,7 @@ gc_more:
 
 	if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
 		gc_type = FG_GC;
-		write_checkpoint(sbi, false);
+		write_checkpoint(sbi, &cpc);
 	}
 
 	if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
@@ -729,7 +732,7 @@ gc_more:
 		goto gc_more;
 
 	if (gc_type == FG_GC)
-		write_checkpoint(sbi, false);
+		write_checkpoint(sbi, &cpc);
 stop:
 	mutex_unlock(&sbi->gc_mutex);
 
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a3fcea3..4189003 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -542,8 +542,11 @@ out:
 		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
 		mutex_unlock(&sbi->cp_mutex);
 	} else if (need_writecp) {
+		struct cp_control cpc = {
+			.reason = CP_SYNC,
+		};
 		mutex_unlock(&sbi->cp_mutex);
-		write_checkpoint(sbi, false);
+		write_checkpoint(sbi, &cpc);
 	} else {
 		mutex_unlock(&sbi->cp_mutex);
 	}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7a91a38..128c420 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -434,8 +434,12 @@ static void f2fs_put_super(struct super_block *sb)
 	stop_gc_thread(sbi);
 
 	/* We don't need to do checkpoint when it's clean */
-	if (sbi->s_dirty)
-		write_checkpoint(sbi, true);
+	if (sbi->s_dirty) {
+		struct cp_control cpc = {
+			.reason = CP_UMOUNT,
+		};
+		write_checkpoint(sbi, &cpc);
+	}
 
 	/*
 	 * normally superblock is clean, so we need to release this.
@@ -466,8 +470,11 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
 	trace_f2fs_sync_fs(sb, sync);
 
 	if (sync) {
+		struct cp_control cpc = {
+			.reason = CP_SYNC,
+		};
 		mutex_lock(&sbi->gc_mutex);
-		write_checkpoint(sbi, false);
+		write_checkpoint(sbi, &cpc);
 		mutex_unlock(&sbi->gc_mutex);
 	} else {
 		f2fs_balance_fs(sbi);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index d06d443..66eaace 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -69,6 +69,11 @@
 		{ GC_GREEDY,	"Greedy" },				\
 		{ GC_CB,	"Cost-Benefit" })
 
+#define show_cpreason(type)						\
+	__print_symbolic(type,						\
+		{ CP_UMOUNT,	"Umount" },				\
+		{ CP_SYNC,	"Sync" })
+
 struct victim_sel_policy;
 
 DECLARE_EVENT_CLASS(f2fs__inode,
@@ -944,25 +949,25 @@ TRACE_EVENT(f2fs_submit_page_mbio,
 
 TRACE_EVENT(f2fs_write_checkpoint,
 
-	TP_PROTO(struct super_block *sb, bool is_umount, char *msg),
+	TP_PROTO(struct super_block *sb, int reason, char *msg),
 
-	TP_ARGS(sb, is_umount, msg),
+	TP_ARGS(sb, reason, msg),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
-		__field(bool,	is_umount)
+		__field(int,	reason)
 		__field(char *,	msg)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= sb->s_dev;
-		__entry->is_umount	= is_umount;
+		__entry->reason		= reason;
 		__entry->msg		= msg;
 	),
 
 	TP_printk("dev = (%d,%d), checkpoint for %s, state = %s",
 		show_dev(__entry),
-		__entry->is_umount ? "clean umount" : "consistency",
+		show_cpreason(__entry->reason),
 		__entry->msg)
 );
 
-- 
1.8.5.2 (Apple Git-48)


------------------------------------------------------------------------------
Meet PCI DSS 3.0 Compliance Requirements with EventLog Analyzer
Achieve PCI DSS 3.0 Compliant Status with Out-of-the-box PCI DSS Reports
Are you Audit-Ready for PCI DSS 3.0 Compliance? Download White paper
Comply to PCI DSS 3.0 Requirement 10 and 11.5 with EventLog Analyzer
http://pubads.g.doubleclick.net/gampad/clk?id=154622311&iu=/4140/ostg.clktrk

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/3] f2fs: introduce FITRIM in f2fs_ioctl
  2014-09-23  4:53 [PATCH 1/3] f2fs: introduce cp_control structure Jaegeuk Kim
@ 2014-09-23  4:53 ` Jaegeuk Kim
  2014-09-30  6:01   ` Chao Yu
  2014-09-23  4:53 ` [PATCH 3/3] f2fs: refactor flush_nat_entries to remove costly reorganizing ops Jaegeuk Kim
  2014-09-30  6:01 ` [PATCH 1/3] f2fs: introduce cp_control structure Chao Yu
  2 siblings, 1 reply; 9+ messages in thread
From: Jaegeuk Kim @ 2014-09-23  4:53 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

This patch introduces FITRIM in f2fs_ioctl.
In this case, f2fs will issue small discards and prefree discards as many as
possible for the given area.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c        |   4 +-
 fs/f2fs/f2fs.h              |   9 +++-
 fs/f2fs/file.c              |  29 ++++++++++++
 fs/f2fs/segment.c           | 110 +++++++++++++++++++++++++++++++++++++++-----
 fs/f2fs/super.c             |   1 +
 include/trace/events/f2fs.h |   3 +-
 6 files changed, 141 insertions(+), 15 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index e401ffd..5d793ba 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -997,7 +997,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	mutex_lock(&sbi->cp_mutex);
 
-	if (!sbi->s_dirty)
+	if (!sbi->s_dirty && cpc->reason != CP_DISCARD)
 		goto out;
 	if (unlikely(f2fs_cp_error(sbi)))
 		goto out;
@@ -1020,7 +1020,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	/* write cached NAT/SIT entries to NAT/SIT area */
 	flush_nat_entries(sbi);
-	flush_sit_entries(sbi);
+	flush_sit_entries(sbi, cpc);
 
 	/* unlock all the fs_lock[] in do_checkpoint() */
 	do_checkpoint(sbi, cpc);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5298924..7b1e1d2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -99,10 +99,15 @@ enum {
 enum {
 	CP_UMOUNT,
 	CP_SYNC,
+	CP_DISCARD,
 };
 
 struct cp_control {
 	int reason;
+	__u64 trim_start;
+	__u64 trim_end;
+	__u64 trim_minlen;
+	__u64 trimmed;
 };
 
 /*
@@ -1276,9 +1281,11 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);
 void invalidate_blocks(struct f2fs_sb_info *, block_t);
 void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
 void clear_prefree_segments(struct f2fs_sb_info *);
+void release_discard_addrs(struct f2fs_sb_info *);
 void discard_next_dnode(struct f2fs_sb_info *, block_t);
 int npages_for_summary_flush(struct f2fs_sb_info *);
 void allocate_new_segments(struct f2fs_sb_info *);
+int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
 struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
 void write_meta_page(struct f2fs_sb_info *, struct page *);
 void write_node_page(struct f2fs_sb_info *, struct page *,
@@ -1295,7 +1302,7 @@ void write_data_summaries(struct f2fs_sb_info *, block_t);
 void write_node_summaries(struct f2fs_sb_info *, block_t);
 int lookup_journal_in_cursum(struct f2fs_summary_block *,
 					int, unsigned int, int);
-void flush_sit_entries(struct f2fs_sb_info *);
+void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *);
 int build_segment_manager(struct f2fs_sb_info *);
 void destroy_segment_manager(struct f2fs_sb_info *);
 int __init create_segment_manager_caches(void);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index ac8c680..1184207 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -860,6 +860,35 @@ out:
 		mnt_drop_write_file(filp);
 		return ret;
 	}
+	case FITRIM:
+	{
+		struct super_block *sb = inode->i_sb;
+		struct request_queue *q = bdev_get_queue(sb->s_bdev);
+		struct fstrim_range range;
+		int ret = 0;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (!blk_queue_discard(q))
+			return -EOPNOTSUPP;
+
+		if (copy_from_user(&range, (struct fstrim_range __user *)arg,
+					sizeof(range)))
+			return -EFAULT;
+
+		range.minlen = max((unsigned int)range.minlen,
+				   q->limits.discard_granularity);
+		ret = f2fs_trim_fs(F2FS_SB(sb), &range);
+		if (ret < 0)
+			return ret;
+
+		if (copy_to_user((struct fstrim_range __user *)arg, &range,
+					sizeof(range)))
+			return -EFAULT;
+
+		return 0;
+	}
 	default:
 		return -ENOTTY;
 	}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 3125a3d..b423005 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -386,45 +386,92 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
 	}
 }
 
-static void add_discard_addrs(struct f2fs_sb_info *sbi,
-			unsigned int segno, struct seg_entry *se)
+static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	struct list_head *head = &SM_I(sbi)->discard_list;
 	struct discard_entry *new;
 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
 	int max_blocks = sbi->blocks_per_seg;
+	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
 	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
 	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
-	unsigned long dmap[entries];
+	unsigned long *dmap;
 	unsigned int start = 0, end = -1;
+	bool force = (cpc->reason == CP_DISCARD);
 	int i;
 
-	if (!test_opt(sbi, DISCARD))
+	if (!force && !test_opt(sbi, DISCARD))
 		return;
 
+	if (force && !se->valid_blocks) {
+		struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+		/*
+		 * if this segment is registered in the prefree list, then
+		 * we should skip adding a discard candidate, and let the
+		 * checkpoint do that later.
+		 */
+		mutex_lock(&dirty_i->seglist_lock);
+		if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) {
+			mutex_unlock(&dirty_i->seglist_lock);
+			cpc->trimmed += sbi->blocks_per_seg;
+			return;
+		}
+		mutex_unlock(&dirty_i->seglist_lock);
+
+		new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
+		INIT_LIST_HEAD(&new->list);
+		new->blkaddr = START_BLOCK(sbi, cpc->trim_start);
+		new->len = sbi->blocks_per_seg;
+		list_add_tail(&new->list, head);
+		SM_I(sbi)->nr_discards += sbi->blocks_per_seg;
+		cpc->trimmed += sbi->blocks_per_seg;
+		return;
+	}
+
 	/* zero block will be discarded through the prefree list */
 	if (!se->valid_blocks || se->valid_blocks == max_blocks)
 		return;
 
+	dmap = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+	if (!dmap)
+		return;
+
 	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
 	for (i = 0; i < entries; i++)
 		dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
 
-	while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
+	while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
 		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
 		if (start >= max_blocks)
 			break;
 
 		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
 
+		if (end - start < cpc->trim_minlen)
+			continue;
+
 		new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
 		INIT_LIST_HEAD(&new->list);
-		new->blkaddr = START_BLOCK(sbi, segno) + start;
+		new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
 		new->len = end - start;
+		cpc->trimmed += end - start;
 
 		list_add_tail(&new->list, head);
 		SM_I(sbi)->nr_discards += end - start;
 	}
+	kfree(dmap);
+}
+
+void release_discard_addrs(struct f2fs_sb_info *sbi)
+{
+	struct list_head *head = &(SM_I(sbi)->discard_list);
+	struct discard_entry *entry, *this;
+
+	/* drop caches */
+	list_for_each_entry_safe(entry, this, head, list) {
+		list_del(&entry->list);
+		kmem_cache_free(discard_entry_slab, entry);
+	}
 }
 
 /*
@@ -897,6 +944,40 @@ static const struct segment_allocation default_salloc_ops = {
 	.allocate_segment = allocate_segment_by_default,
 };
 
+int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
+{
+	block_t start_addr = SM_I(sbi)->main_blkaddr;
+	__u64 start = range->start >> sbi->log_blocksize;
+	__u64 end = start + (range->len >> sbi->log_blocksize) - 1;
+	__u64 segment = 1 << (sbi->log_blocksize + sbi->log_blocks_per_seg);
+	unsigned int start_segno, end_segno;
+	struct cp_control cpc;
+
+	if (range->minlen > segment || start >= TOTAL_BLKS(sbi) ||
+			range->len < sbi->blocksize)
+		return -EINVAL;
+
+	if (end <= start_addr)
+		goto out;
+
+	/* start/end segment number in main_area */
+	start_segno = (start <= start_addr) ? 0 : GET_SEGNO(sbi, start);
+	end_segno = (end >= TOTAL_BLKS(sbi)) ? TOTAL_SEGS(sbi) - 1 :
+		GET_SEGNO(sbi, end);
+
+	cpc.reason = CP_DISCARD;
+	cpc.trim_start = start_segno;
+	cpc.trim_end = end_segno;
+	cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
+	cpc.trimmed = 0;
+
+	/* do checkpoint to issue discard commands safely */
+	write_checkpoint(sbi, &cpc);
+out:
+	range->len = cpc.trimmed << sbi->log_blocksize;
+	return 0;
+}
+
 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -1524,7 +1605,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
  * CP calls this function, which flushes SIT entries including sit_journal,
  * and moves prefree segs to free segs.
  */
-void flush_sit_entries(struct f2fs_sb_info *sbi)
+void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
 	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
@@ -1534,6 +1615,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
 	struct list_head *head = &SM_I(sbi)->sit_entry_set;
 	unsigned long nsegs = TOTAL_SEGS(sbi);
 	bool to_journal = true;
+	struct seg_entry *se;
 
 	mutex_lock(&curseg->curseg_mutex);
 	mutex_lock(&sit_i->sentry_lock);
@@ -1580,11 +1662,14 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
 		/* flush dirty sit entries in region of current sit set */
 		for_each_set_bit_from(segno, bitmap, end) {
 			int offset, sit_offset;
-			struct seg_entry *se = get_seg_entry(sbi, segno);
+
+			se = get_seg_entry(sbi, segno);
 
 			/* add discard candidates */
-			if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards)
-				add_discard_addrs(sbi, segno, se);
+			if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) {
+				cpc->trim_start = segno;
+				add_discard_addrs(sbi, cpc);
+			}
 
 			if (to_journal) {
 				offset = lookup_journal_in_cursum(sum,
@@ -1614,8 +1699,11 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
 
 	f2fs_bug_on(sbi, !list_empty(head));
 	f2fs_bug_on(sbi, sit_i->dirty_sentries);
-
 out:
+	if (cpc->reason == CP_DISCARD) {
+		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
+			add_discard_addrs(sbi, cpc);
+	}
 	mutex_unlock(&sit_i->sentry_lock);
 	mutex_unlock(&curseg->curseg_mutex);
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 128c420..bb6b568 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -446,6 +446,7 @@ static void f2fs_put_super(struct super_block *sb)
 	 * In addition, EIO will skip do checkpoint, we need this as well.
 	 */
 	release_dirty_inode(sbi);
+	release_discard_addrs(sbi);
 
 	iput(sbi->node_inode);
 	iput(sbi->meta_inode);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 66eaace..bbc4de9 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -72,7 +72,8 @@
 #define show_cpreason(type)						\
 	__print_symbolic(type,						\
 		{ CP_UMOUNT,	"Umount" },				\
-		{ CP_SYNC,	"Sync" })
+		{ CP_SYNC,	"Sync" },				\
+		{ CP_DISCARD,	"Discard" })
 
 struct victim_sel_policy;
 
-- 
1.8.5.2 (Apple Git-48)


------------------------------------------------------------------------------
Meet PCI DSS 3.0 Compliance Requirements with EventLog Analyzer
Achieve PCI DSS 3.0 Compliant Status with Out-of-the-box PCI DSS Reports
Are you Audit-Ready for PCI DSS 3.0 Compliance? Download White paper
Comply to PCI DSS 3.0 Requirement 10 and 11.5 with EventLog Analyzer
http://pubads.g.doubleclick.net/gampad/clk?id=154622311&iu=/4140/ostg.clktrk

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/3] f2fs: refactor flush_nat_entries to remove costly reorganizing ops
  2014-09-23  4:53 [PATCH 1/3] f2fs: introduce cp_control structure Jaegeuk Kim
  2014-09-23  4:53 ` [PATCH 2/3] f2fs: introduce FITRIM in f2fs_ioctl Jaegeuk Kim
@ 2014-09-23  4:53 ` Jaegeuk Kim
  2014-09-30  6:04   ` [f2fs-dev] " Chao Yu
  2014-10-11 13:11   ` Sasha Levin
  2014-09-30  6:01 ` [PATCH 1/3] f2fs: introduce cp_control structure Chao Yu
  2 siblings, 2 replies; 9+ messages in thread
From: Jaegeuk Kim @ 2014-09-23  4:53 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-f2fs-devel; +Cc: Jaegeuk Kim

Previously, f2fs tries to reorganize the dirty nat entries into multiple sets
according to its nid ranges. This can improve the flushing nat pages, however,
if there are a lot of cached nat entries, it becomes a bottleneck.

This patch introduces a new set management flow by removing dirty nat list and
adding a series of set operations when the nat entry becomes dirty.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h |  13 +--
 fs/f2fs/node.c | 299 +++++++++++++++++++++++++++++----------------------------
 fs/f2fs/node.h |   9 +-
 3 files changed, 162 insertions(+), 159 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7b1e1d2..94cfdc4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -164,6 +164,9 @@ struct fsync_inode_entry {
 #define sit_in_journal(sum, i)		(sum->sit_j.entries[i].se)
 #define segno_in_journal(sum, i)	(sum->sit_j.entries[i].segno)
 
+#define MAX_NAT_JENTRIES(sum)	(NAT_JOURNAL_ENTRIES - nats_in_cursum(sum))
+#define MAX_SIT_JENTRIES(sum)	(SIT_JOURNAL_ENTRIES - sits_in_cursum(sum))
+
 static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i)
 {
 	int before = nats_in_cursum(rs);
@@ -182,9 +185,8 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
 								int type)
 {
 	if (type == NAT_JOURNAL)
-		return nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES;
-
-	return sits_in_cursum(sum) + size <= SIT_JOURNAL_ENTRIES;
+		return size <= MAX_NAT_JENTRIES(sum);
+	return size <= MAX_SIT_JENTRIES(sum);
 }
 
 /*
@@ -292,11 +294,10 @@ struct f2fs_nm_info {
 
 	/* NAT cache management */
 	struct radix_tree_root nat_root;/* root of the nat entry cache */
+	struct radix_tree_root nat_set_root;/* root of the nat set cache */
 	rwlock_t nat_tree_lock;		/* protect nat_tree_lock */
-	unsigned int nat_cnt;		/* the # of cached nat entries */
 	struct list_head nat_entries;	/* cached nat entry list (clean) */
-	struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
-	struct list_head nat_entry_set;	/* nat entry set list */
+	unsigned int nat_cnt;		/* the # of cached nat entries */
 	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
 
 	/* free node ids management */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 21ed91b..f5a21f4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -123,6 +123,57 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
 	kmem_cache_free(nat_entry_slab, e);
 }
 
+static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
+						struct nat_entry *ne)
+{
+	nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
+	struct nat_entry_set *head;
+
+	if (get_nat_flag(ne, IS_DIRTY))
+		return;
+retry:
+	head = radix_tree_lookup(&nm_i->nat_set_root, set);
+	if (!head) {
+		head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
+
+		INIT_LIST_HEAD(&head->entry_list);
+		INIT_LIST_HEAD(&head->set_list);
+		head->set = set;
+		head->entry_cnt = 0;
+
+		if (radix_tree_insert(&nm_i->nat_set_root, set, head)) {
+			cond_resched();
+			goto retry;
+		}
+	}
+	list_move_tail(&ne->list, &head->entry_list);
+	nm_i->dirty_nat_cnt++;
+	head->entry_cnt++;
+	set_nat_flag(ne, IS_DIRTY, true);
+}
+
+static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
+						struct nat_entry *ne)
+{
+	nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
+	struct nat_entry_set *head;
+
+	head = radix_tree_lookup(&nm_i->nat_set_root, set);
+	if (head) {
+		list_move_tail(&ne->list, &nm_i->nat_entries);
+		set_nat_flag(ne, IS_DIRTY, false);
+		head->entry_cnt--;
+		nm_i->dirty_nat_cnt--;
+	}
+}
+
+static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
+		nid_t start, unsigned int nr, struct nat_entry_set **ep)
+{
+	return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
+							start, nr);
+}
+
 bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1739,79 +1790,6 @@ skip:
 	return err;
 }
 
-static struct nat_entry_set *grab_nat_entry_set(void)
-{
-	struct nat_entry_set *nes =
-			f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
-
-	nes->entry_cnt = 0;
-	INIT_LIST_HEAD(&nes->set_list);
-	INIT_LIST_HEAD(&nes->entry_list);
-	return nes;
-}
-
-static void release_nat_entry_set(struct nat_entry_set *nes,
-						struct f2fs_nm_info *nm_i)
-{
-	nm_i->dirty_nat_cnt -= nes->entry_cnt;
-	list_del(&nes->set_list);
-	kmem_cache_free(nat_entry_set_slab, nes);
-}
-
-static void adjust_nat_entry_set(struct nat_entry_set *nes,
-						struct list_head *head)
-{
-	struct nat_entry_set *next = nes;
-
-	if (list_is_last(&nes->set_list, head))
-		return;
-
-	list_for_each_entry_continue(next, head, set_list)
-		if (nes->entry_cnt <= next->entry_cnt)
-			break;
-
-	list_move_tail(&nes->set_list, &next->set_list);
-}
-
-static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
-{
-	struct nat_entry_set *nes;
-	nid_t start_nid = START_NID(ne->ni.nid);
-
-	list_for_each_entry(nes, head, set_list) {
-		if (nes->start_nid == start_nid) {
-			list_move_tail(&ne->list, &nes->entry_list);
-			nes->entry_cnt++;
-			adjust_nat_entry_set(nes, head);
-			return;
-		}
-	}
-
-	nes = grab_nat_entry_set();
-
-	nes->start_nid = start_nid;
-	list_move_tail(&ne->list, &nes->entry_list);
-	nes->entry_cnt++;
-	list_add(&nes->set_list, head);
-}
-
-static void merge_nats_in_set(struct f2fs_sb_info *sbi)
-{
-	struct f2fs_nm_info *nm_i = NM_I(sbi);
-	struct list_head *dirty_list = &nm_i->dirty_nat_entries;
-	struct list_head *set_list = &nm_i->nat_entry_set;
-	struct nat_entry *ne, *tmp;
-
-	write_lock(&nm_i->nat_tree_lock);
-	list_for_each_entry_safe(ne, tmp, dirty_list, list) {
-		if (nat_get_blkaddr(ne) == NEW_ADDR)
-			continue;
-		add_nat_entry(ne, set_list);
-		nm_i->dirty_nat_cnt++;
-	}
-	write_unlock(&nm_i->nat_tree_lock);
-}
-
 static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1846,101 +1824,129 @@ found:
 	mutex_unlock(&curseg->curseg_mutex);
 }
 
-/*
- * This function is called during the checkpointing process.
- */
-void flush_nat_entries(struct f2fs_sb_info *sbi)
+static void __adjust_nat_entry_set(struct nat_entry_set *nes,
+						struct list_head *head, int max)
 {
-	struct f2fs_nm_info *nm_i = NM_I(sbi);
-	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
-	struct f2fs_summary_block *sum = curseg->sum_blk;
-	struct nat_entry_set *nes, *tmp;
-	struct list_head *head = &nm_i->nat_entry_set;
-	bool to_journal = true;
+	struct nat_entry_set *cur;
+	nid_t dirty_cnt = 0;
 
-	/* merge nat entries of dirty list to nat entry set temporarily */
-	merge_nats_in_set(sbi);
+	if (nes->entry_cnt >= max)
+		goto add_out;
 
-	/*
-	 * if there are no enough space in journal to store dirty nat
-	 * entries, remove all entries from journal and merge them
-	 * into nat entry set.
-	 */
-	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) {
-		remove_nats_in_journal(sbi);
-
-		/*
-		 * merge nat entries of dirty list to nat entry set temporarily
-		 */
-		merge_nats_in_set(sbi);
+	list_for_each_entry(cur, head, set_list) {
+		dirty_cnt += cur->entry_cnt;
+		if (dirty_cnt > max)
+			break;
+		if (cur->entry_cnt >= nes->entry_cnt) {
+			list_add(&nes->set_list, cur->set_list.prev);
+			return;
+		}
 	}
+add_out:
+	list_add_tail(&nes->set_list, head);
+}
 
-	if (!nm_i->dirty_nat_cnt)
-		return;
+static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
+					struct nat_entry_set *set)
+{
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+	struct f2fs_summary_block *sum = curseg->sum_blk;
+	nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
+	bool to_journal = true;
+	struct f2fs_nat_block *nat_blk;
+	struct nat_entry *ne, *cur;
+	struct page *page = NULL;
 
 	/*
 	 * there are two steps to flush nat entries:
 	 * #1, flush nat entries to journal in current hot data summary block.
 	 * #2, flush nat entries to nat page.
 	 */
-	list_for_each_entry_safe(nes, tmp, head, set_list) {
-		struct f2fs_nat_block *nat_blk;
-		struct nat_entry *ne, *cur;
-		struct page *page;
-		nid_t start_nid = nes->start_nid;
+	if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL))
+		to_journal = false;
 
-		if (to_journal &&
-			!__has_cursum_space(sum, nes->entry_cnt, NAT_JOURNAL))
-			to_journal = false;
+	if (to_journal) {
+		mutex_lock(&curseg->curseg_mutex);
+	} else {
+		page = get_next_nat_page(sbi, start_nid);
+		nat_blk = page_address(page);
+		f2fs_bug_on(sbi, !nat_blk);
+	}
+
+	/* flush dirty nats in nat entry set */
+	list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
+		struct f2fs_nat_entry *raw_ne;
+		nid_t nid = nat_get_nid(ne);
+		int offset;
 
 		if (to_journal) {
-			mutex_lock(&curseg->curseg_mutex);
+			offset = lookup_journal_in_cursum(sum,
+							NAT_JOURNAL, nid, 1);
+			f2fs_bug_on(sbi, offset < 0);
+			raw_ne = &nat_in_journal(sum, offset);
+			nid_in_journal(sum, offset) = cpu_to_le32(nid);
 		} else {
-			page = get_next_nat_page(sbi, start_nid);
-			nat_blk = page_address(page);
-			f2fs_bug_on(sbi, !nat_blk);
+			raw_ne = &nat_blk->entries[nid - start_nid];
 		}
+		raw_nat_from_node_info(raw_ne, &ne->ni);
 
-		/* flush dirty nats in nat entry set */
-		list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
-			struct f2fs_nat_entry *raw_ne;
-			nid_t nid = nat_get_nid(ne);
-			int offset;
+		write_lock(&NM_I(sbi)->nat_tree_lock);
+		nat_reset_flag(ne);
+		__clear_nat_cache_dirty(NM_I(sbi), ne);
+		write_unlock(&NM_I(sbi)->nat_tree_lock);
 
-			if (to_journal) {
-				offset = lookup_journal_in_cursum(sum,
-							NAT_JOURNAL, nid, 1);
-				f2fs_bug_on(sbi, offset < 0);
-				raw_ne = &nat_in_journal(sum, offset);
-				nid_in_journal(sum, offset) = cpu_to_le32(nid);
-			} else {
-				raw_ne = &nat_blk->entries[nid - start_nid];
-			}
-			raw_nat_from_node_info(raw_ne, &ne->ni);
+		if (nat_get_blkaddr(ne) == NULL_ADDR)
+			add_free_nid(sbi, nid, false);
+	}
 
-			if (nat_get_blkaddr(ne) == NULL_ADDR &&
-				add_free_nid(sbi, nid, false) <= 0) {
-				write_lock(&nm_i->nat_tree_lock);
-				__del_from_nat_cache(nm_i, ne);
-				write_unlock(&nm_i->nat_tree_lock);
-			} else {
-				write_lock(&nm_i->nat_tree_lock);
-				nat_reset_flag(ne);
-				__clear_nat_cache_dirty(nm_i, ne);
-				write_unlock(&nm_i->nat_tree_lock);
-			}
-		}
+	if (to_journal)
+		mutex_unlock(&curseg->curseg_mutex);
+	else
+		f2fs_put_page(page, 1);
 
-		if (to_journal)
-			mutex_unlock(&curseg->curseg_mutex);
-		else
-			f2fs_put_page(page, 1);
+	f2fs_bug_on(sbi, set->entry_cnt);
+	radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
+	kmem_cache_free(nat_entry_set_slab, set);
+}
 
-		f2fs_bug_on(sbi, !list_empty(&nes->entry_list));
-		release_nat_entry_set(nes, nm_i);
+/*
+ * This function is called during the checkpointing process.
+ */
+void flush_nat_entries(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+	struct f2fs_summary_block *sum = curseg->sum_blk;
+	struct nat_entry_set *setvec[NATVEC_SIZE];
+	struct nat_entry_set *set, *tmp;
+	unsigned int found;
+	nid_t set_idx = 0;
+	LIST_HEAD(sets);
+
+	/*
+	 * if there are no enough space in journal to store dirty nat
+	 * entries, remove all entries from journal and merge them
+	 * into nat entry set.
+	 */
+	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+		remove_nats_in_journal(sbi);
+
+	if (!nm_i->dirty_nat_cnt)
+		return;
+
+	while ((found = __gang_lookup_nat_set(nm_i,
+					set_idx, NATVEC_SIZE, setvec))) {
+		unsigned idx;
+		set_idx = setvec[found - 1]->set + 1;
+		for (idx = 0; idx < found; idx++)
+			__adjust_nat_entry_set(setvec[idx], &sets,
+							MAX_NAT_JENTRIES(sum));
 	}
 
-	f2fs_bug_on(sbi, !list_empty(head));
+	/* flush dirty nats in nat entry set */
+	list_for_each_entry_safe(set, tmp, &sets, set_list)
+		__flush_nat_entry_set(sbi, set);
+
 	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
 
@@ -1968,9 +1974,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
 	INIT_LIST_HEAD(&nm_i->free_nid_list);
 	INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
+	INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC);
 	INIT_LIST_HEAD(&nm_i->nat_entries);
-	INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
-	INIT_LIST_HEAD(&nm_i->nat_entry_set);
 
 	mutex_init(&nm_i->build_lock);
 	spin_lock_init(&nm_i->free_nid_list_lock);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index b8ba63c..bd826d9 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -43,6 +43,7 @@ enum {
 	IS_CHECKPOINTED,	/* is it checkpointed before? */
 	HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
 	HAS_LAST_FSYNC,		/* has the latest node fsync mark? */
+	IS_DIRTY,		/* this nat entry is dirty? */
 };
 
 struct nat_entry {
@@ -60,10 +61,6 @@ struct nat_entry {
 #define nat_get_version(nat)		(nat->ni.version)
 #define nat_set_version(nat, v)		(nat->ni.version = v)
 
-#define __set_nat_cache_dirty(nm_i, ne)					\
-		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
-#define __clear_nat_cache_dirty(nm_i, ne)				\
-		list_move_tail(&ne->list, &nm_i->nat_entries);
 #define inc_node_version(version)	(++version)
 
 static inline void set_nat_flag(struct nat_entry *ne,
@@ -113,9 +110,9 @@ enum mem_type {
 };
 
 struct nat_entry_set {
-	struct list_head set_list;	/* link with all nat sets */
+	struct list_head set_list;	/* link with other nat sets */
 	struct list_head entry_list;	/* link with dirty nat entries */
-	nid_t start_nid;		/* start nid of nats in set */
+	nid_t set;			/* set number*/
 	unsigned int entry_cnt;		/* the # of nat entries in set */
 };
 
-- 
1.8.5.2 (Apple Git-48)


------------------------------------------------------------------------------
Meet PCI DSS 3.0 Compliance Requirements with EventLog Analyzer
Achieve PCI DSS 3.0 Compliant Status with Out-of-the-box PCI DSS Reports
Are you Audit-Ready for PCI DSS 3.0 Compliance? Download White paper
Comply to PCI DSS 3.0 Requirement 10 and 11.5 with EventLog Analyzer
http://pubads.g.doubleclick.net/gampad/clk?id=154622311&iu=/4140/ostg.clktrk

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] f2fs: introduce cp_control structure
  2014-09-23  4:53 [PATCH 1/3] f2fs: introduce cp_control structure Jaegeuk Kim
  2014-09-23  4:53 ` [PATCH 2/3] f2fs: introduce FITRIM in f2fs_ioctl Jaegeuk Kim
  2014-09-23  4:53 ` [PATCH 3/3] f2fs: refactor flush_nat_entries to remove costly reorganizing ops Jaegeuk Kim
@ 2014-09-30  6:01 ` Chao Yu
  2 siblings, 0 replies; 9+ messages in thread
From: Chao Yu @ 2014-09-30  6:01 UTC (permalink / raw)
  To: 'Jaegeuk Kim'; +Cc: linux-kernel, linux-f2fs-devel

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Tuesday, September 23, 2014 12:53 PM
> To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 1/3] f2fs: introduce cp_control structure
> 
> This patch add a new data structure to control checkpoint parameters.
> Currently, it presents the reason of checkpoint such as is_umount and normal
> sync.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Looks good!

Reviewed-by: Chao Yu <chao2.yu@samsung.com>



------------------------------------------------------------------------------
Meet PCI DSS 3.0 Compliance Requirements with EventLog Analyzer
Achieve PCI DSS 3.0 Compliant Status with Out-of-the-box PCI DSS Reports
Are you Audit-Ready for PCI DSS 3.0 Compliance? Download White paper
Comply to PCI DSS 3.0 Requirement 10 and 11.5 with EventLog Analyzer
http://pubads.g.doubleclick.net/gampad/clk?id=154622311&iu=/4140/ostg.clktrk

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] f2fs: introduce FITRIM in f2fs_ioctl
  2014-09-23  4:53 ` [PATCH 2/3] f2fs: introduce FITRIM in f2fs_ioctl Jaegeuk Kim
@ 2014-09-30  6:01   ` Chao Yu
  2014-10-01 15:07     ` Jaegeuk Kim
  0 siblings, 1 reply; 9+ messages in thread
From: Chao Yu @ 2014-09-30  6:01 UTC (permalink / raw)
  To: 'Jaegeuk Kim'; +Cc: linux-kernel, linux-f2fs-devel

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Tuesday, September 23, 2014 12:53 PM
> To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 2/3] f2fs: introduce FITRIM in f2fs_ioctl
> 
> This patch introduces FITRIM in f2fs_ioctl.
> In this case, f2fs will issue small discards and prefree discards as many as
> possible for the given area.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Looks good!

Reviewed-by: Chao Yu <chao2.yu@samsung.com>

> +
>  	/* zero block will be discarded through the prefree list */
>  	if (!se->valid_blocks || se->valid_blocks == max_blocks)
>  		return;
> 
> +	dmap = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);

We can afford 64 bytes allocation in stack, how about altering dmap to a local
array to reduce memory pressure at that moment and avoid delay of allocation?

> +	if (!dmap)
> +		return;
> +



------------------------------------------------------------------------------
Meet PCI DSS 3.0 Compliance Requirements with EventLog Analyzer
Achieve PCI DSS 3.0 Compliant Status with Out-of-the-box PCI DSS Reports
Are you Audit-Ready for PCI DSS 3.0 Compliance? Download White paper
Comply to PCI DSS 3.0 Requirement 10 and 11.5 with EventLog Analyzer
http://pubads.g.doubleclick.net/gampad/clk?id=154622311&iu=/4140/ostg.clktrk

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [f2fs-dev] [PATCH 3/3] f2fs: refactor flush_nat_entries to remove costly reorganizing ops
  2014-09-23  4:53 ` [PATCH 3/3] f2fs: refactor flush_nat_entries to remove costly reorganizing ops Jaegeuk Kim
@ 2014-09-30  6:04   ` Chao Yu
  2014-10-01 15:06     ` Jaegeuk Kim
  2014-10-11 13:11   ` Sasha Levin
  1 sibling, 1 reply; 9+ messages in thread
From: Chao Yu @ 2014-09-30  6:04 UTC (permalink / raw)
  To: 'Jaegeuk Kim'; +Cc: linux-kernel, linux-f2fs-devel

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> Sent: Tuesday, September 23, 2014 12:53 PM
> To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> linux-f2fs-devel@lists.sourceforge.net
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 3/3] f2fs: refactor flush_nat_entries to remove costly reorganizing
> ops
> 
> Previously, f2fs tries to reorganize the dirty nat entries into multiple sets
> according to its nid ranges. This can improve the flushing nat pages, however,
> if there are a lot of cached nat entries, it becomes a bottleneck.
> 
> This patch introduces a new set management flow by removing dirty nat list and
> adding a series of set operations when the nat entry becomes dirty.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> ---
>  fs/f2fs/f2fs.h |  13 +--
>  fs/f2fs/node.c | 299 +++++++++++++++++++++++++++++----------------------------
>  fs/f2fs/node.h |   9 +-
>  3 files changed, 162 insertions(+), 159 deletions(-)
> 
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 7b1e1d2..94cfdc4 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -164,6 +164,9 @@ struct fsync_inode_entry {
>  #define sit_in_journal(sum, i)		(sum->sit_j.entries[i].se)
>  #define segno_in_journal(sum, i)	(sum->sit_j.entries[i].segno)
> 
> +#define MAX_NAT_JENTRIES(sum)	(NAT_JOURNAL_ENTRIES - nats_in_cursum(sum))
> +#define MAX_SIT_JENTRIES(sum)	(SIT_JOURNAL_ENTRIES - sits_in_cursum(sum))
> +
>  static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i)
>  {
>  	int before = nats_in_cursum(rs);
> @@ -182,9 +185,8 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int
> size,
>  								int type)
>  {
>  	if (type == NAT_JOURNAL)
> -		return nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES;
> -
> -	return sits_in_cursum(sum) + size <= SIT_JOURNAL_ENTRIES;
> +		return size <= MAX_NAT_JENTRIES(sum);
> +	return size <= MAX_SIT_JENTRIES(sum);
>  }
> 
>  /*
> @@ -292,11 +294,10 @@ struct f2fs_nm_info {
> 
>  	/* NAT cache management */
>  	struct radix_tree_root nat_root;/* root of the nat entry cache */
> +	struct radix_tree_root nat_set_root;/* root of the nat set cache */
>  	rwlock_t nat_tree_lock;		/* protect nat_tree_lock */
> -	unsigned int nat_cnt;		/* the # of cached nat entries */
>  	struct list_head nat_entries;	/* cached nat entry list (clean) */
> -	struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
> -	struct list_head nat_entry_set;	/* nat entry set list */
> +	unsigned int nat_cnt;		/* the # of cached nat entries */
>  	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
> 
>  	/* free node ids management */
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 21ed91b..f5a21f4 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -123,6 +123,57 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct
> nat_entry *e)
>  	kmem_cache_free(nat_entry_slab, e);
>  }
> 
> +static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
> +						struct nat_entry *ne)
> +{
> +	nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;

nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);

> +	struct nat_entry_set *head;
> +
> +	if (get_nat_flag(ne, IS_DIRTY))
> +		return;
> +retry:
> +	head = radix_tree_lookup(&nm_i->nat_set_root, set);
> +	if (!head) {
> +		head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
> +
> +		INIT_LIST_HEAD(&head->entry_list);
> +		INIT_LIST_HEAD(&head->set_list);
> +		head->set = set;
> +		head->entry_cnt = 0;
> +
> +		if (radix_tree_insert(&nm_i->nat_set_root, set, head)) {
> +			cond_resched();
> +			goto retry;
> +		}
> +	}
> +	list_move_tail(&ne->list, &head->entry_list);
> +	nm_i->dirty_nat_cnt++;
> +	head->entry_cnt++;
> +	set_nat_flag(ne, IS_DIRTY, true);
> +}
> +
> +static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
> +						struct nat_entry *ne)
> +{
> +	nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
> +	struct nat_entry_set *head;
> +
> +	head = radix_tree_lookup(&nm_i->nat_set_root, set);
> +	if (head) {
> +		list_move_tail(&ne->list, &nm_i->nat_entries);
> +		set_nat_flag(ne, IS_DIRTY, false);
> +		head->entry_cnt--;
> +		nm_i->dirty_nat_cnt--;
> +	}
> +}
> +
> +static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
> +		nid_t start, unsigned int nr, struct nat_entry_set **ep)
> +{
> +	return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
> +							start, nr);
> +}
> +
>  bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> @@ -1739,79 +1790,6 @@ skip:
>  	return err;
>  }
> 
> -static struct nat_entry_set *grab_nat_entry_set(void)
> -{
> -	struct nat_entry_set *nes =
> -			f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
> -
> -	nes->entry_cnt = 0;
> -	INIT_LIST_HEAD(&nes->set_list);
> -	INIT_LIST_HEAD(&nes->entry_list);
> -	return nes;
> -}
> -
> -static void release_nat_entry_set(struct nat_entry_set *nes,
> -						struct f2fs_nm_info *nm_i)
> -{
> -	nm_i->dirty_nat_cnt -= nes->entry_cnt;
> -	list_del(&nes->set_list);
> -	kmem_cache_free(nat_entry_set_slab, nes);
> -}
> -
> -static void adjust_nat_entry_set(struct nat_entry_set *nes,
> -						struct list_head *head)
> -{
> -	struct nat_entry_set *next = nes;
> -
> -	if (list_is_last(&nes->set_list, head))
> -		return;
> -
> -	list_for_each_entry_continue(next, head, set_list)
> -		if (nes->entry_cnt <= next->entry_cnt)
> -			break;
> -
> -	list_move_tail(&nes->set_list, &next->set_list);
> -}
> -
> -static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
> -{
> -	struct nat_entry_set *nes;
> -	nid_t start_nid = START_NID(ne->ni.nid);
> -
> -	list_for_each_entry(nes, head, set_list) {
> -		if (nes->start_nid == start_nid) {
> -			list_move_tail(&ne->list, &nes->entry_list);
> -			nes->entry_cnt++;
> -			adjust_nat_entry_set(nes, head);
> -			return;
> -		}
> -	}
> -
> -	nes = grab_nat_entry_set();
> -
> -	nes->start_nid = start_nid;
> -	list_move_tail(&ne->list, &nes->entry_list);
> -	nes->entry_cnt++;
> -	list_add(&nes->set_list, head);
> -}
> -
> -static void merge_nats_in_set(struct f2fs_sb_info *sbi)
> -{
> -	struct f2fs_nm_info *nm_i = NM_I(sbi);
> -	struct list_head *dirty_list = &nm_i->dirty_nat_entries;
> -	struct list_head *set_list = &nm_i->nat_entry_set;
> -	struct nat_entry *ne, *tmp;
> -
> -	write_lock(&nm_i->nat_tree_lock);
> -	list_for_each_entry_safe(ne, tmp, dirty_list, list) {
> -		if (nat_get_blkaddr(ne) == NEW_ADDR)
> -			continue;

Shouldn't we move this condition judgment into __flush_nat_entry_set?

> -		add_nat_entry(ne, set_list);
> -		nm_i->dirty_nat_cnt++;
> -	}
> -	write_unlock(&nm_i->nat_tree_lock);
> -}
> -
>  static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> @@ -1846,101 +1824,129 @@ found:
>  	mutex_unlock(&curseg->curseg_mutex);
>  }
> 
> -/*
> - * This function is called during the checkpointing process.
> - */
> -void flush_nat_entries(struct f2fs_sb_info *sbi)
> +static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> +						struct list_head *head, int max)
>  {
> -	struct f2fs_nm_info *nm_i = NM_I(sbi);
> -	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> -	struct f2fs_summary_block *sum = curseg->sum_blk;
> -	struct nat_entry_set *nes, *tmp;
> -	struct list_head *head = &nm_i->nat_entry_set;
> -	bool to_journal = true;
> +	struct nat_entry_set *cur;
> +	nid_t dirty_cnt = 0;
> 
> -	/* merge nat entries of dirty list to nat entry set temporarily */
> -	merge_nats_in_set(sbi);
> +	if (nes->entry_cnt >= max)
> +		goto add_out;
> 
> -	/*
> -	 * if there are no enough space in journal to store dirty nat
> -	 * entries, remove all entries from journal and merge them
> -	 * into nat entry set.
> -	 */
> -	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) {
> -		remove_nats_in_journal(sbi);
> -
> -		/*
> -		 * merge nat entries of dirty list to nat entry set temporarily
> -		 */
> -		merge_nats_in_set(sbi);
> +	list_for_each_entry(cur, head, set_list) {
> +		dirty_cnt += cur->entry_cnt;
> +		if (dirty_cnt > max)
> +			break;

Seems a problem here, For example:
set no:		1	2	3	4	5	6	7
nat number:	4	3	2	1	1	1	1
max = 6
set list will be: 3--4--2--1--1--1--1
Then we will prior flush nat set with more entries in the set list, result in
a degradation.
We'd better not break the adjustment until we finish to traverse the whole
sequential sorted set list to avoid above condition, so how about removing the
codes relate to dirty_cnt?

Thanks,
Yu

> +		if (cur->entry_cnt >= nes->entry_cnt) {
> +			list_add(&nes->set_list, cur->set_list.prev);
> +			return;
> +		}
>  	}
> +add_out:
> +	list_add_tail(&nes->set_list, head);
> +}
> 
> -	if (!nm_i->dirty_nat_cnt)
> -		return;
> +static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> +					struct nat_entry_set *set)
> +{
> +	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> +	struct f2fs_summary_block *sum = curseg->sum_blk;
> +	nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
> +	bool to_journal = true;
> +	struct f2fs_nat_block *nat_blk;
> +	struct nat_entry *ne, *cur;
> +	struct page *page = NULL;
> 
>  	/*
>  	 * there are two steps to flush nat entries:
>  	 * #1, flush nat entries to journal in current hot data summary block.
>  	 * #2, flush nat entries to nat page.
>  	 */
> -	list_for_each_entry_safe(nes, tmp, head, set_list) {
> -		struct f2fs_nat_block *nat_blk;
> -		struct nat_entry *ne, *cur;
> -		struct page *page;
> -		nid_t start_nid = nes->start_nid;
> +	if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL))
> +		to_journal = false;
> 
> -		if (to_journal &&
> -			!__has_cursum_space(sum, nes->entry_cnt, NAT_JOURNAL))
> -			to_journal = false;
> +	if (to_journal) {
> +		mutex_lock(&curseg->curseg_mutex);
> +	} else {
> +		page = get_next_nat_page(sbi, start_nid);
> +		nat_blk = page_address(page);
> +		f2fs_bug_on(sbi, !nat_blk);
> +	}
> +
> +	/* flush dirty nats in nat entry set */
> +	list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
> +		struct f2fs_nat_entry *raw_ne;
> +		nid_t nid = nat_get_nid(ne);
> +		int offset;
> 
>  		if (to_journal) {
> -			mutex_lock(&curseg->curseg_mutex);
> +			offset = lookup_journal_in_cursum(sum,
> +							NAT_JOURNAL, nid, 1);
> +			f2fs_bug_on(sbi, offset < 0);
> +			raw_ne = &nat_in_journal(sum, offset);
> +			nid_in_journal(sum, offset) = cpu_to_le32(nid);
>  		} else {
> -			page = get_next_nat_page(sbi, start_nid);
> -			nat_blk = page_address(page);
> -			f2fs_bug_on(sbi, !nat_blk);
> +			raw_ne = &nat_blk->entries[nid - start_nid];
>  		}
> +		raw_nat_from_node_info(raw_ne, &ne->ni);
> 
> -		/* flush dirty nats in nat entry set */
> -		list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
> -			struct f2fs_nat_entry *raw_ne;
> -			nid_t nid = nat_get_nid(ne);
> -			int offset;
> +		write_lock(&NM_I(sbi)->nat_tree_lock);
> +		nat_reset_flag(ne);
> +		__clear_nat_cache_dirty(NM_I(sbi), ne);
> +		write_unlock(&NM_I(sbi)->nat_tree_lock);
> 
> -			if (to_journal) {
> -				offset = lookup_journal_in_cursum(sum,
> -							NAT_JOURNAL, nid, 1);
> -				f2fs_bug_on(sbi, offset < 0);
> -				raw_ne = &nat_in_journal(sum, offset);
> -				nid_in_journal(sum, offset) = cpu_to_le32(nid);
> -			} else {
> -				raw_ne = &nat_blk->entries[nid - start_nid];
> -			}
> -			raw_nat_from_node_info(raw_ne, &ne->ni);
> +		if (nat_get_blkaddr(ne) == NULL_ADDR)
> +			add_free_nid(sbi, nid, false);
> +	}
> 
> -			if (nat_get_blkaddr(ne) == NULL_ADDR &&
> -				add_free_nid(sbi, nid, false) <= 0) {
> -				write_lock(&nm_i->nat_tree_lock);
> -				__del_from_nat_cache(nm_i, ne);
> -				write_unlock(&nm_i->nat_tree_lock);
> -			} else {
> -				write_lock(&nm_i->nat_tree_lock);
> -				nat_reset_flag(ne);
> -				__clear_nat_cache_dirty(nm_i, ne);
> -				write_unlock(&nm_i->nat_tree_lock);
> -			}
> -		}
> +	if (to_journal)
> +		mutex_unlock(&curseg->curseg_mutex);
> +	else
> +		f2fs_put_page(page, 1);
> 
> -		if (to_journal)
> -			mutex_unlock(&curseg->curseg_mutex);
> -		else
> -			f2fs_put_page(page, 1);
> +	f2fs_bug_on(sbi, set->entry_cnt);
> +	radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
> +	kmem_cache_free(nat_entry_set_slab, set);
> +}
> 
> -		f2fs_bug_on(sbi, !list_empty(&nes->entry_list));
> -		release_nat_entry_set(nes, nm_i);
> +/*
> + * This function is called during the checkpointing process.
> + */
> +void flush_nat_entries(struct f2fs_sb_info *sbi)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> +	struct f2fs_summary_block *sum = curseg->sum_blk;
> +	struct nat_entry_set *setvec[NATVEC_SIZE];
> +	struct nat_entry_set *set, *tmp;
> +	unsigned int found;
> +	nid_t set_idx = 0;
> +	LIST_HEAD(sets);
> +
> +	/*
> +	 * if there are no enough space in journal to store dirty nat
> +	 * entries, remove all entries from journal and merge them
> +	 * into nat entry set.
> +	 */
> +	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> +		remove_nats_in_journal(sbi);
> +
> +	if (!nm_i->dirty_nat_cnt)
> +		return;
> +
> +	while ((found = __gang_lookup_nat_set(nm_i,
> +					set_idx, NATVEC_SIZE, setvec))) {
> +		unsigned idx;
> +		set_idx = setvec[found - 1]->set + 1;
> +		for (idx = 0; idx < found; idx++)
> +			__adjust_nat_entry_set(setvec[idx], &sets,
> +							MAX_NAT_JENTRIES(sum));
>  	}
> 
> -	f2fs_bug_on(sbi, !list_empty(head));
> +	/* flush dirty nats in nat entry set */
> +	list_for_each_entry_safe(set, tmp, &sets, set_list)
> +		__flush_nat_entry_set(sbi, set);
> +
>  	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
>  }
> 
> @@ -1968,9 +1974,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>  	INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
>  	INIT_LIST_HEAD(&nm_i->free_nid_list);
>  	INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
> +	INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC);
>  	INIT_LIST_HEAD(&nm_i->nat_entries);
> -	INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
> -	INIT_LIST_HEAD(&nm_i->nat_entry_set);
> 
>  	mutex_init(&nm_i->build_lock);
>  	spin_lock_init(&nm_i->free_nid_list_lock);
> diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> index b8ba63c..bd826d9 100644
> --- a/fs/f2fs/node.h
> +++ b/fs/f2fs/node.h
> @@ -43,6 +43,7 @@ enum {
>  	IS_CHECKPOINTED,	/* is it checkpointed before? */
>  	HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
>  	HAS_LAST_FSYNC,		/* has the latest node fsync mark? */
> +	IS_DIRTY,		/* this nat entry is dirty? */
>  };
> 
>  struct nat_entry {
> @@ -60,10 +61,6 @@ struct nat_entry {
>  #define nat_get_version(nat)		(nat->ni.version)
>  #define nat_set_version(nat, v)		(nat->ni.version = v)
> 
> -#define __set_nat_cache_dirty(nm_i, ne)					\
> -		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
> -#define __clear_nat_cache_dirty(nm_i, ne)				\
> -		list_move_tail(&ne->list, &nm_i->nat_entries);
>  #define inc_node_version(version)	(++version)
> 
>  static inline void set_nat_flag(struct nat_entry *ne,
> @@ -113,9 +110,9 @@ enum mem_type {
>  };
> 
>  struct nat_entry_set {
> -	struct list_head set_list;	/* link with all nat sets */
> +	struct list_head set_list;	/* link with other nat sets */
>  	struct list_head entry_list;	/* link with dirty nat entries */
> -	nid_t start_nid;		/* start nid of nats in set */
> +	nid_t set;			/* set number*/
>  	unsigned int entry_cnt;		/* the # of nat entries in set */
>  };
> 
> --
> 1.8.5.2 (Apple Git-48)
> 
> 
> ------------------------------------------------------------------------------
> Meet PCI DSS 3.0 Compliance Requirements with EventLog Analyzer
> Achieve PCI DSS 3.0 Compliant Status with Out-of-the-box PCI DSS Reports
> Are you Audit-Ready for PCI DSS 3.0 Compliance? Download White paper
> Comply to PCI DSS 3.0 Requirement 10 and 11.5 with EventLog Analyzer
> http://pubads.g.doubleclick.net/gampad/clk?id=154622311&iu=/4140/ostg.clktrk
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [f2fs-dev] [PATCH 3/3] f2fs: refactor flush_nat_entries to remove costly reorganizing ops
  2014-09-30  6:04   ` [f2fs-dev] " Chao Yu
@ 2014-10-01 15:06     ` Jaegeuk Kim
  0 siblings, 0 replies; 9+ messages in thread
From: Jaegeuk Kim @ 2014-10-01 15:06 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On Tue, Sep 30, 2014 at 02:04:20PM +0800, Chao Yu wrote:
> Hi Jaegeuk,
> 
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > Sent: Tuesday, September 23, 2014 12:53 PM
> > To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > linux-f2fs-devel@lists.sourceforge.net
> > Cc: Jaegeuk Kim
> > Subject: [f2fs-dev] [PATCH 3/3] f2fs: refactor flush_nat_entries to remove costly reorganizing
> > ops
> > 
> > Previously, f2fs tries to reorganize the dirty nat entries into multiple sets
> > according to its nid ranges. This can improve the flushing nat pages, however,
> > if there are a lot of cached nat entries, it becomes a bottleneck.
> > 
> > This patch introduces a new set management flow by removing dirty nat list and
> > adding a series of set operations when the nat entry becomes dirty.
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> > ---
> >  fs/f2fs/f2fs.h |  13 +--
> >  fs/f2fs/node.c | 299 +++++++++++++++++++++++++++++----------------------------
> >  fs/f2fs/node.h |   9 +-
> >  3 files changed, 162 insertions(+), 159 deletions(-)
> > 
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 7b1e1d2..94cfdc4 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -164,6 +164,9 @@ struct fsync_inode_entry {
> >  #define sit_in_journal(sum, i)		(sum->sit_j.entries[i].se)
> >  #define segno_in_journal(sum, i)	(sum->sit_j.entries[i].segno)
> > 
> > +#define MAX_NAT_JENTRIES(sum)	(NAT_JOURNAL_ENTRIES - nats_in_cursum(sum))
> > +#define MAX_SIT_JENTRIES(sum)	(SIT_JOURNAL_ENTRIES - sits_in_cursum(sum))
> > +
> >  static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i)
> >  {
> >  	int before = nats_in_cursum(rs);
> > @@ -182,9 +185,8 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int
> > size,
> >  								int type)
> >  {
> >  	if (type == NAT_JOURNAL)
> > -		return nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES;
> > -
> > -	return sits_in_cursum(sum) + size <= SIT_JOURNAL_ENTRIES;
> > +		return size <= MAX_NAT_JENTRIES(sum);
> > +	return size <= MAX_SIT_JENTRIES(sum);
> >  }
> > 
> >  /*
> > @@ -292,11 +294,10 @@ struct f2fs_nm_info {
> > 
> >  	/* NAT cache management */
> >  	struct radix_tree_root nat_root;/* root of the nat entry cache */
> > +	struct radix_tree_root nat_set_root;/* root of the nat set cache */
> >  	rwlock_t nat_tree_lock;		/* protect nat_tree_lock */
> > -	unsigned int nat_cnt;		/* the # of cached nat entries */
> >  	struct list_head nat_entries;	/* cached nat entry list (clean) */
> > -	struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
> > -	struct list_head nat_entry_set;	/* nat entry set list */
> > +	unsigned int nat_cnt;		/* the # of cached nat entries */
> >  	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
> > 
> >  	/* free node ids management */
> > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > index 21ed91b..f5a21f4 100644
> > --- a/fs/f2fs/node.c
> > +++ b/fs/f2fs/node.c
> > @@ -123,6 +123,57 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct
> > nat_entry *e)
> >  	kmem_cache_free(nat_entry_slab, e);
> >  }
> > 
> > +static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
> > +						struct nat_entry *ne)
> > +{
> > +	nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
> 
> nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
> 
> > +	struct nat_entry_set *head;
> > +
> > +	if (get_nat_flag(ne, IS_DIRTY))
> > +		return;
> > +retry:
> > +	head = radix_tree_lookup(&nm_i->nat_set_root, set);
> > +	if (!head) {
> > +		head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
> > +
> > +		INIT_LIST_HEAD(&head->entry_list);
> > +		INIT_LIST_HEAD(&head->set_list);
> > +		head->set = set;
> > +		head->entry_cnt = 0;
> > +
> > +		if (radix_tree_insert(&nm_i->nat_set_root, set, head)) {
> > +			cond_resched();
> > +			goto retry;
> > +		}
> > +	}
> > +	list_move_tail(&ne->list, &head->entry_list);
> > +	nm_i->dirty_nat_cnt++;
> > +	head->entry_cnt++;
> > +	set_nat_flag(ne, IS_DIRTY, true);
> > +}
> > +
> > +static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
> > +						struct nat_entry *ne)
> > +{
> > +	nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
> > +	struct nat_entry_set *head;
> > +
> > +	head = radix_tree_lookup(&nm_i->nat_set_root, set);
> > +	if (head) {
> > +		list_move_tail(&ne->list, &nm_i->nat_entries);
> > +		set_nat_flag(ne, IS_DIRTY, false);
> > +		head->entry_cnt--;
> > +		nm_i->dirty_nat_cnt--;
> > +	}
> > +}
> > +
> > +static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
> > +		nid_t start, unsigned int nr, struct nat_entry_set **ep)
> > +{
> > +	return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
> > +							start, nr);
> > +}
> > +
> >  bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
> >  {
> >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > @@ -1739,79 +1790,6 @@ skip:
> >  	return err;
> >  }
> > 
> > -static struct nat_entry_set *grab_nat_entry_set(void)
> > -{
> > -	struct nat_entry_set *nes =
> > -			f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
> > -
> > -	nes->entry_cnt = 0;
> > -	INIT_LIST_HEAD(&nes->set_list);
> > -	INIT_LIST_HEAD(&nes->entry_list);
> > -	return nes;
> > -}
> > -
> > -static void release_nat_entry_set(struct nat_entry_set *nes,
> > -						struct f2fs_nm_info *nm_i)
> > -{
> > -	nm_i->dirty_nat_cnt -= nes->entry_cnt;
> > -	list_del(&nes->set_list);
> > -	kmem_cache_free(nat_entry_set_slab, nes);
> > -}
> > -
> > -static void adjust_nat_entry_set(struct nat_entry_set *nes,
> > -						struct list_head *head)
> > -{
> > -	struct nat_entry_set *next = nes;
> > -
> > -	if (list_is_last(&nes->set_list, head))
> > -		return;
> > -
> > -	list_for_each_entry_continue(next, head, set_list)
> > -		if (nes->entry_cnt <= next->entry_cnt)
> > -			break;
> > -
> > -	list_move_tail(&nes->set_list, &next->set_list);
> > -}
> > -
> > -static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
> > -{
> > -	struct nat_entry_set *nes;
> > -	nid_t start_nid = START_NID(ne->ni.nid);
> > -
> > -	list_for_each_entry(nes, head, set_list) {
> > -		if (nes->start_nid == start_nid) {
> > -			list_move_tail(&ne->list, &nes->entry_list);
> > -			nes->entry_cnt++;
> > -			adjust_nat_entry_set(nes, head);
> > -			return;
> > -		}
> > -	}
> > -
> > -	nes = grab_nat_entry_set();
> > -
> > -	nes->start_nid = start_nid;
> > -	list_move_tail(&ne->list, &nes->entry_list);
> > -	nes->entry_cnt++;
> > -	list_add(&nes->set_list, head);
> > -}
> > -
> > -static void merge_nats_in_set(struct f2fs_sb_info *sbi)
> > -{
> > -	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > -	struct list_head *dirty_list = &nm_i->dirty_nat_entries;
> > -	struct list_head *set_list = &nm_i->nat_entry_set;
> > -	struct nat_entry *ne, *tmp;
> > -
> > -	write_lock(&nm_i->nat_tree_lock);
> > -	list_for_each_entry_safe(ne, tmp, dirty_list, list) {
> > -		if (nat_get_blkaddr(ne) == NEW_ADDR)
> > -			continue;
> 
> Shouldn't we move this condition judgment into __flush_nat_entry_set?
> 
> > -		add_nat_entry(ne, set_list);
> > -		nm_i->dirty_nat_cnt++;
> > -	}
> > -	write_unlock(&nm_i->nat_tree_lock);
> > -}
> > -
> >  static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
> >  {
> >  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > @@ -1846,101 +1824,129 @@ found:
> >  	mutex_unlock(&curseg->curseg_mutex);
> >  }
> > 
> > -/*
> > - * This function is called during the checkpointing process.
> > - */
> > -void flush_nat_entries(struct f2fs_sb_info *sbi)
> > +static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> > +						struct list_head *head, int max)
> >  {
> > -	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > -	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > -	struct f2fs_summary_block *sum = curseg->sum_blk;
> > -	struct nat_entry_set *nes, *tmp;
> > -	struct list_head *head = &nm_i->nat_entry_set;
> > -	bool to_journal = true;
> > +	struct nat_entry_set *cur;
> > +	nid_t dirty_cnt = 0;
> > 
> > -	/* merge nat entries of dirty list to nat entry set temporarily */
> > -	merge_nats_in_set(sbi);
> > +	if (nes->entry_cnt >= max)
> > +		goto add_out;
> > 
> > -	/*
> > -	 * if there are no enough space in journal to store dirty nat
> > -	 * entries, remove all entries from journal and merge them
> > -	 * into nat entry set.
> > -	 */
> > -	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) {
> > -		remove_nats_in_journal(sbi);
> > -
> > -		/*
> > -		 * merge nat entries of dirty list to nat entry set temporarily
> > -		 */
> > -		merge_nats_in_set(sbi);
> > +	list_for_each_entry(cur, head, set_list) {
> > +		dirty_cnt += cur->entry_cnt;
> > +		if (dirty_cnt > max)
> > +			break;
> 
> Seems a problem here, For example:
> set no:		1	2	3	4	5	6	7
> nat number:	4	3	2	1	1	1	1
> max = 6
> set list will be: 3--4--2--1--1--1--1
> Then we will prior flush nat set with more entries in the set list, result in
> a degradation.
> We'd better not break the adjustment until we finish to traverse the whole
> sequential sorted set list to avoid above condition, so how about removing the
> codes relate to dirty_cnt?

Hi Chao,

Agreed.
I fixed that.
Thanks,

> 
> Thanks,
> Yu
> 
> > +		if (cur->entry_cnt >= nes->entry_cnt) {
> > +			list_add(&nes->set_list, cur->set_list.prev);
> > +			return;
> > +		}
> >  	}
> > +add_out:
> > +	list_add_tail(&nes->set_list, head);
> > +}
> > 
> > -	if (!nm_i->dirty_nat_cnt)
> > -		return;
> > +static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> > +					struct nat_entry_set *set)
> > +{
> > +	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > +	struct f2fs_summary_block *sum = curseg->sum_blk;
> > +	nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
> > +	bool to_journal = true;
> > +	struct f2fs_nat_block *nat_blk;
> > +	struct nat_entry *ne, *cur;
> > +	struct page *page = NULL;
> > 
> >  	/*
> >  	 * there are two steps to flush nat entries:
> >  	 * #1, flush nat entries to journal in current hot data summary block.
> >  	 * #2, flush nat entries to nat page.
> >  	 */
> > -	list_for_each_entry_safe(nes, tmp, head, set_list) {
> > -		struct f2fs_nat_block *nat_blk;
> > -		struct nat_entry *ne, *cur;
> > -		struct page *page;
> > -		nid_t start_nid = nes->start_nid;
> > +	if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL))
> > +		to_journal = false;
> > 
> > -		if (to_journal &&
> > -			!__has_cursum_space(sum, nes->entry_cnt, NAT_JOURNAL))
> > -			to_journal = false;
> > +	if (to_journal) {
> > +		mutex_lock(&curseg->curseg_mutex);
> > +	} else {
> > +		page = get_next_nat_page(sbi, start_nid);
> > +		nat_blk = page_address(page);
> > +		f2fs_bug_on(sbi, !nat_blk);
> > +	}
> > +
> > +	/* flush dirty nats in nat entry set */
> > +	list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
> > +		struct f2fs_nat_entry *raw_ne;
> > +		nid_t nid = nat_get_nid(ne);
> > +		int offset;
> > 
> >  		if (to_journal) {
> > -			mutex_lock(&curseg->curseg_mutex);
> > +			offset = lookup_journal_in_cursum(sum,
> > +							NAT_JOURNAL, nid, 1);
> > +			f2fs_bug_on(sbi, offset < 0);
> > +			raw_ne = &nat_in_journal(sum, offset);
> > +			nid_in_journal(sum, offset) = cpu_to_le32(nid);
> >  		} else {
> > -			page = get_next_nat_page(sbi, start_nid);
> > -			nat_blk = page_address(page);
> > -			f2fs_bug_on(sbi, !nat_blk);
> > +			raw_ne = &nat_blk->entries[nid - start_nid];
> >  		}
> > +		raw_nat_from_node_info(raw_ne, &ne->ni);
> > 
> > -		/* flush dirty nats in nat entry set */
> > -		list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
> > -			struct f2fs_nat_entry *raw_ne;
> > -			nid_t nid = nat_get_nid(ne);
> > -			int offset;
> > +		write_lock(&NM_I(sbi)->nat_tree_lock);
> > +		nat_reset_flag(ne);
> > +		__clear_nat_cache_dirty(NM_I(sbi), ne);
> > +		write_unlock(&NM_I(sbi)->nat_tree_lock);
> > 
> > -			if (to_journal) {
> > -				offset = lookup_journal_in_cursum(sum,
> > -							NAT_JOURNAL, nid, 1);
> > -				f2fs_bug_on(sbi, offset < 0);
> > -				raw_ne = &nat_in_journal(sum, offset);
> > -				nid_in_journal(sum, offset) = cpu_to_le32(nid);
> > -			} else {
> > -				raw_ne = &nat_blk->entries[nid - start_nid];
> > -			}
> > -			raw_nat_from_node_info(raw_ne, &ne->ni);
> > +		if (nat_get_blkaddr(ne) == NULL_ADDR)
> > +			add_free_nid(sbi, nid, false);
> > +	}
> > 
> > -			if (nat_get_blkaddr(ne) == NULL_ADDR &&
> > -				add_free_nid(sbi, nid, false) <= 0) {
> > -				write_lock(&nm_i->nat_tree_lock);
> > -				__del_from_nat_cache(nm_i, ne);
> > -				write_unlock(&nm_i->nat_tree_lock);
> > -			} else {
> > -				write_lock(&nm_i->nat_tree_lock);
> > -				nat_reset_flag(ne);
> > -				__clear_nat_cache_dirty(nm_i, ne);
> > -				write_unlock(&nm_i->nat_tree_lock);
> > -			}
> > -		}
> > +	if (to_journal)
> > +		mutex_unlock(&curseg->curseg_mutex);
> > +	else
> > +		f2fs_put_page(page, 1);
> > 
> > -		if (to_journal)
> > -			mutex_unlock(&curseg->curseg_mutex);
> > -		else
> > -			f2fs_put_page(page, 1);
> > +	f2fs_bug_on(sbi, set->entry_cnt);
> > +	radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
> > +	kmem_cache_free(nat_entry_set_slab, set);
> > +}
> > 
> > -		f2fs_bug_on(sbi, !list_empty(&nes->entry_list));
> > -		release_nat_entry_set(nes, nm_i);
> > +/*
> > + * This function is called during the checkpointing process.
> > + */
> > +void flush_nat_entries(struct f2fs_sb_info *sbi)
> > +{
> > +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> > +	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > +	struct f2fs_summary_block *sum = curseg->sum_blk;
> > +	struct nat_entry_set *setvec[NATVEC_SIZE];
> > +	struct nat_entry_set *set, *tmp;
> > +	unsigned int found;
> > +	nid_t set_idx = 0;
> > +	LIST_HEAD(sets);
> > +
> > +	/*
> > +	 * if there are no enough space in journal to store dirty nat
> > +	 * entries, remove all entries from journal and merge them
> > +	 * into nat entry set.
> > +	 */
> > +	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
> > +		remove_nats_in_journal(sbi);
> > +
> > +	if (!nm_i->dirty_nat_cnt)
> > +		return;
> > +
> > +	while ((found = __gang_lookup_nat_set(nm_i,
> > +					set_idx, NATVEC_SIZE, setvec))) {
> > +		unsigned idx;
> > +		set_idx = setvec[found - 1]->set + 1;
> > +		for (idx = 0; idx < found; idx++)
> > +			__adjust_nat_entry_set(setvec[idx], &sets,
> > +							MAX_NAT_JENTRIES(sum));
> >  	}
> > 
> > -	f2fs_bug_on(sbi, !list_empty(head));
> > +	/* flush dirty nats in nat entry set */
> > +	list_for_each_entry_safe(set, tmp, &sets, set_list)
> > +		__flush_nat_entry_set(sbi, set);
> > +
> >  	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
> >  }
> > 
> > @@ -1968,9 +1974,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
> >  	INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
> >  	INIT_LIST_HEAD(&nm_i->free_nid_list);
> >  	INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
> > +	INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC);
> >  	INIT_LIST_HEAD(&nm_i->nat_entries);
> > -	INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
> > -	INIT_LIST_HEAD(&nm_i->nat_entry_set);
> > 
> >  	mutex_init(&nm_i->build_lock);
> >  	spin_lock_init(&nm_i->free_nid_list_lock);
> > diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> > index b8ba63c..bd826d9 100644
> > --- a/fs/f2fs/node.h
> > +++ b/fs/f2fs/node.h
> > @@ -43,6 +43,7 @@ enum {
> >  	IS_CHECKPOINTED,	/* is it checkpointed before? */
> >  	HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
> >  	HAS_LAST_FSYNC,		/* has the latest node fsync mark? */
> > +	IS_DIRTY,		/* this nat entry is dirty? */
> >  };
> > 
> >  struct nat_entry {
> > @@ -60,10 +61,6 @@ struct nat_entry {
> >  #define nat_get_version(nat)		(nat->ni.version)
> >  #define nat_set_version(nat, v)		(nat->ni.version = v)
> > 
> > -#define __set_nat_cache_dirty(nm_i, ne)					\
> > -		list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
> > -#define __clear_nat_cache_dirty(nm_i, ne)				\
> > -		list_move_tail(&ne->list, &nm_i->nat_entries);
> >  #define inc_node_version(version)	(++version)
> > 
> >  static inline void set_nat_flag(struct nat_entry *ne,
> > @@ -113,9 +110,9 @@ enum mem_type {
> >  };
> > 
> >  struct nat_entry_set {
> > -	struct list_head set_list;	/* link with all nat sets */
> > +	struct list_head set_list;	/* link with other nat sets */
> >  	struct list_head entry_list;	/* link with dirty nat entries */
> > -	nid_t start_nid;		/* start nid of nats in set */
> > +	nid_t set;			/* set number*/
> >  	unsigned int entry_cnt;		/* the # of nat entries in set */
> >  };
> > 
> > --
> > 1.8.5.2 (Apple Git-48)
> > 
> > 
> > ------------------------------------------------------------------------------
> > Meet PCI DSS 3.0 Compliance Requirements with EventLog Analyzer
> > Achieve PCI DSS 3.0 Compliant Status with Out-of-the-box PCI DSS Reports
> > Are you Audit-Ready for PCI DSS 3.0 Compliance? Download White paper
> > Comply to PCI DSS 3.0 Requirement 10 and 11.5 with EventLog Analyzer
> > http://pubads.g.doubleclick.net/gampad/clk?id=154622311&iu=/4140/ostg.clktrk
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > Linux-f2fs-devel@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] f2fs: introduce FITRIM in f2fs_ioctl
  2014-09-30  6:01   ` Chao Yu
@ 2014-10-01 15:07     ` Jaegeuk Kim
  0 siblings, 0 replies; 9+ messages in thread
From: Jaegeuk Kim @ 2014-10-01 15:07 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-kernel, linux-f2fs-devel

On Tue, Sep 30, 2014 at 02:01:58PM +0800, Chao Yu wrote:
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:jaegeuk@kernel.org]
> > Sent: Tuesday, September 23, 2014 12:53 PM
> > To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org;
> > linux-f2fs-devel@lists.sourceforge.net
> > Cc: Jaegeuk Kim
> > Subject: [f2fs-dev] [PATCH 2/3] f2fs: introduce FITRIM in f2fs_ioctl
> > 
> > This patch introduces FITRIM in f2fs_ioctl.
> > In this case, f2fs will issue small discards and prefree discards as many as
> > possible for the given area.
> > 
> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> 
> Looks good!
> 
> Reviewed-by: Chao Yu <chao2.yu@samsung.com>
> 
> > +
> >  	/* zero block will be discarded through the prefree list */
> >  	if (!se->valid_blocks || se->valid_blocks == max_blocks)
> >  		return;
> > 
> > +	dmap = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
> 
> We can afford 64 bytes allocation in stack, how about altering dmap to a local
> array to reduce memory pressure at that moment and avoid delay of allocation?

It's not a big deal.
Fixed.
Thanks,

> 
> > +	if (!dmap)
> > +		return;
> > +

------------------------------------------------------------------------------
Meet PCI DSS 3.0 Compliance Requirements with EventLog Analyzer
Achieve PCI DSS 3.0 Compliant Status with Out-of-the-box PCI DSS Reports
Are you Audit-Ready for PCI DSS 3.0 Compliance? Download White paper
Comply to PCI DSS 3.0 Requirement 10 and 11.5 with EventLog Analyzer
http://pubads.g.doubleclick.net/gampad/clk?id=154622311&iu=/4140/ostg.clktrk

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] f2fs: refactor flush_nat_entries to remove costly reorganizing ops
  2014-09-23  4:53 ` [PATCH 3/3] f2fs: refactor flush_nat_entries to remove costly reorganizing ops Jaegeuk Kim
  2014-09-30  6:04   ` [f2fs-dev] " Chao Yu
@ 2014-10-11 13:11   ` Sasha Levin
  1 sibling, 0 replies; 9+ messages in thread
From: Sasha Levin @ 2014-10-11 13:11 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-fsdevel, linux-f2fs-devel

On 09/23/2014 12:53 AM, Jaegeuk Kim wrote:
> +static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
> +						struct nat_entry *ne)
> +{
> +	nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
> +	struct nat_entry_set *head;
> +
> +	if (get_nat_flag(ne, IS_DIRTY))
> +		return;
> +retry:
> +	head = radix_tree_lookup(&nm_i->nat_set_root, set);
> +	if (!head) {
> +		head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);

This is funny, you call f2fs_kmem_cache_alloc() here with GFP_ATOMIC because
of disabled preemption, but f2fs_kmem_cache_alloc() will attempt to
cond_resched() in case of failed allocations:

	retry:
	        entry = kmem_cache_alloc(cachep, flags);
	        if (!entry) {
	                cond_resched();
	                goto retry;
	        }

So in reality, f2fs_kmem_cache_alloc can't really work with GFP_ATOMIC,
and right now there are two different locations that call it with that
flag.


Thanks,
Sasha

------------------------------------------------------------------------------
Meet PCI DSS 3.0 Compliance Requirements with EventLog Analyzer
Achieve PCI DSS 3.0 Compliant Status with Out-of-the-box PCI DSS Reports
Are you Audit-Ready for PCI DSS 3.0 Compliance? Download White paper
Comply to PCI DSS 3.0 Requirement 10 and 11.5 with EventLog Analyzer
http://p.sf.net/sfu/Zoho

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2014-10-11 13:11 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-09-23  4:53 [PATCH 1/3] f2fs: introduce cp_control structure Jaegeuk Kim
2014-09-23  4:53 ` [PATCH 2/3] f2fs: introduce FITRIM in f2fs_ioctl Jaegeuk Kim
2014-09-30  6:01   ` Chao Yu
2014-10-01 15:07     ` Jaegeuk Kim
2014-09-23  4:53 ` [PATCH 3/3] f2fs: refactor flush_nat_entries to remove costly reorganizing ops Jaegeuk Kim
2014-09-30  6:04   ` [f2fs-dev] " Chao Yu
2014-10-01 15:06     ` Jaegeuk Kim
2014-10-11 13:11   ` Sasha Levin
2014-09-30  6:01 ` [PATCH 1/3] f2fs: introduce cp_control structure Chao Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).