All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Light Hsieh <Light.Hsieh@mediatek.com>,
	Chao Yu <yuchao0@huawei.com>, Jaegeuk Kim <jaegeuk@kernel.org>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH 5.4 41/47] f2fs: avoid race condition for shrinker count
Date: Mon,  4 Jan 2021 16:57:40 +0100	[thread overview]
Message-ID: <20210104155707.716046859@linuxfoundation.org> (raw)
In-Reply-To: <20210104155705.740576914@linuxfoundation.org>

From: Jaegeuk Kim <jaegeuk@kernel.org>

[ Upstream commit a95ba66ac1457b76fe472c8e092ab1006271f16c ]

Light reported sometimes shinker gets nat_cnt < dirty_nat_cnt resulting in
wrong do_shinker work. Let's avoid to return insane overflowed value by adding
single tracking value.

Reported-by: Light Hsieh <Light.Hsieh@mediatek.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/f2fs/checkpoint.c |  2 +-
 fs/f2fs/debug.c      | 11 ++++++-----
 fs/f2fs/f2fs.h       | 10 ++++++++--
 fs/f2fs/node.c       | 29 ++++++++++++++++++-----------
 fs/f2fs/node.h       |  4 ++--
 fs/f2fs/shrinker.c   |  4 +---
 6 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index c966ccc44c157..a57219c51c01a 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1596,7 +1596,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			goto out;
 		}
 
-		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
+		if (NM_I(sbi)->nat_cnt[DIRTY_NAT] == 0 &&
 				SIT_I(sbi)->dirty_sentries == 0 &&
 				prefree_segments(sbi) == 0) {
 			f2fs_flush_sit_entries(sbi, cpc);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 9b0bedd82581b..d8d64447bc947 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -107,8 +107,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 		si->node_pages = NODE_MAPPING(sbi)->nrpages;
 	if (sbi->meta_inode)
 		si->meta_pages = META_MAPPING(sbi)->nrpages;
-	si->nats = NM_I(sbi)->nat_cnt;
-	si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
+	si->nats = NM_I(sbi)->nat_cnt[TOTAL_NAT];
+	si->dirty_nats = NM_I(sbi)->nat_cnt[DIRTY_NAT];
 	si->sits = MAIN_SEGS(sbi);
 	si->dirty_sits = SIT_I(sbi)->dirty_sentries;
 	si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID];
@@ -254,9 +254,10 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] +
 				NM_I(sbi)->nid_cnt[PREALLOC_NID]) *
 				sizeof(struct free_nid);
-	si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
-	si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
-					sizeof(struct nat_entry_set);
+	si->cache_mem += NM_I(sbi)->nat_cnt[TOTAL_NAT] *
+				sizeof(struct nat_entry);
+	si->cache_mem += NM_I(sbi)->nat_cnt[DIRTY_NAT] *
+				sizeof(struct nat_entry_set);
 	si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
 	for (i = 0; i < MAX_INO_ENTRY; i++)
 		si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0ddc4a74b9d43..4ca3c2a0a0f5b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -797,6 +797,13 @@ enum nid_state {
 	MAX_NID_STATE,
 };
 
+enum nat_state {
+	TOTAL_NAT,
+	DIRTY_NAT,
+	RECLAIMABLE_NAT,
+	MAX_NAT_STATE,
+};
+
 struct f2fs_nm_info {
 	block_t nat_blkaddr;		/* base disk address of NAT */
 	nid_t max_nid;			/* maximum possible node ids */
@@ -812,8 +819,7 @@ struct f2fs_nm_info {
 	struct rw_semaphore nat_tree_lock;	/* protect nat_tree_lock */
 	struct list_head nat_entries;	/* cached nat entry list (clean) */
 	spinlock_t nat_list_lock;	/* protect clean nat entry list */
-	unsigned int nat_cnt;		/* the # of cached nat entries */
-	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
+	unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
 	unsigned int nat_blocks;	/* # of nat blocks */
 
 	/* free node ids management */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 3ac2a4b32375d..7ce33698ae381 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -62,8 +62,8 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
 				sizeof(struct free_nid)) >> PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
 	} else if (type == NAT_ENTRIES) {
-		mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
-							PAGE_SHIFT;
+		mem_size = (nm_i->nat_cnt[TOTAL_NAT] *
+				sizeof(struct nat_entry)) >> PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
 		if (excess_cached_nats(sbi))
 			res = false;
@@ -177,7 +177,8 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
 	list_add_tail(&ne->list, &nm_i->nat_entries);
 	spin_unlock(&nm_i->nat_list_lock);
 
-	nm_i->nat_cnt++;
+	nm_i->nat_cnt[TOTAL_NAT]++;
+	nm_i->nat_cnt[RECLAIMABLE_NAT]++;
 	return ne;
 }
 
@@ -207,7 +208,8 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
 {
 	radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
-	nm_i->nat_cnt--;
+	nm_i->nat_cnt[TOTAL_NAT]--;
+	nm_i->nat_cnt[RECLAIMABLE_NAT]--;
 	__free_nat_entry(e);
 }
 
@@ -253,7 +255,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
 	if (get_nat_flag(ne, IS_DIRTY))
 		goto refresh_list;
 
-	nm_i->dirty_nat_cnt++;
+	nm_i->nat_cnt[DIRTY_NAT]++;
+	nm_i->nat_cnt[RECLAIMABLE_NAT]--;
 	set_nat_flag(ne, IS_DIRTY, true);
 refresh_list:
 	spin_lock(&nm_i->nat_list_lock);
@@ -273,7 +276,8 @@ static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
 
 	set_nat_flag(ne, IS_DIRTY, false);
 	set->entry_cnt--;
-	nm_i->dirty_nat_cnt--;
+	nm_i->nat_cnt[DIRTY_NAT]--;
+	nm_i->nat_cnt[RECLAIMABLE_NAT]++;
 }
 
 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
@@ -2881,14 +2885,17 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	LIST_HEAD(sets);
 	int err = 0;
 
-	/* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
+	/*
+	 * during unmount, let's flush nat_bits before checking
+	 * nat_cnt[DIRTY_NAT].
+	 */
 	if (enabled_nat_bits(sbi, cpc)) {
 		down_write(&nm_i->nat_tree_lock);
 		remove_nats_in_journal(sbi);
 		up_write(&nm_i->nat_tree_lock);
 	}
 
-	if (!nm_i->dirty_nat_cnt)
+	if (!nm_i->nat_cnt[DIRTY_NAT])
 		return 0;
 
 	down_write(&nm_i->nat_tree_lock);
@@ -2899,7 +2906,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	 * into nat entry set.
 	 */
 	if (enabled_nat_bits(sbi, cpc) ||
-		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+		!__has_cursum_space(journal,
+			nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL))
 		remove_nats_in_journal(sbi);
 
 	while ((found = __gang_lookup_nat_set(nm_i,
@@ -3023,7 +3031,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 						F2FS_RESERVED_NODE_NUM;
 	nm_i->nid_cnt[FREE_NID] = 0;
 	nm_i->nid_cnt[PREALLOC_NID] = 0;
-	nm_i->nat_cnt = 0;
 	nm_i->ram_thresh = DEF_RAM_THRESHOLD;
 	nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
 	nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
@@ -3160,7 +3167,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
 			__del_from_nat_cache(nm_i, natvec[idx]);
 		}
 	}
-	f2fs_bug_on(sbi, nm_i->nat_cnt);
+	f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]);
 
 	/* destroy nat set cache */
 	nid = 0;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index e05af5df56485..4a2e7eaf2b028 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -123,13 +123,13 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
 
 static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi)
 {
-	return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid *
+	return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid *
 					NM_I(sbi)->dirty_nats_ratio / 100;
 }
 
 static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
 {
-	return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
+	return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD;
 }
 
 static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index a467aca29cfef..3ceebaaee3840 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -18,9 +18,7 @@ static unsigned int shrinker_run_no;
 
 static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
 {
-	long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
-
-	return count > 0 ? count : 0;
+	return NM_I(sbi)->nat_cnt[RECLAIMABLE_NAT];
 }
 
 static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
-- 
2.27.0




  parent reply	other threads:[~2021-01-04 16:01 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-04 15:56 [PATCH 5.4 00/47] 5.4.87-rc1 review Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 01/47] net/sched: sch_taprio: reset child qdiscs before freeing them Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 02/47] md/raid10: initialize r10_bio->read_slot before use Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 03/47] thermal/drivers/cpufreq_cooling: Update cpufreq_state only if state has changed Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 04/47] ext4: prevent creating duplicate encrypted filenames Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 05/47] ubifs: " Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 06/47] f2fs: " Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 07/47] fscrypt: add fscrypt_is_nokey_name() Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 08/47] fscrypt: remove kernel-internal constants from UAPI header Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 09/47] vfio/pci: Move dummy_resources_list init in vfio_pci_probe() Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 10/47] btrfs: fix race when defragmenting leads to unnecessary IO Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 11/47] ext4: dont remount read-only with errors=continue on reboot Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 12/47] KVM: x86: avoid incorrect writes to host MSR_IA32_SPEC_CTRL Greg Kroah-Hartman
2021-02-26 11:03   ` Thomas Lamprecht
2021-02-26 11:27     ` Paolo Bonzini
2021-02-26 12:59       ` Greg Kroah-Hartman
2021-02-26 14:15         ` Paolo Bonzini
2021-02-26 14:18           ` Thomas Lamprecht
2021-02-26 14:21             ` Paolo Bonzini
2021-01-04 15:57 ` [PATCH 5.4 13/47] KVM: SVM: relax conditions for allowing MSR_IA32_SPEC_CTRL accesses Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 14/47] KVM: x86: reinstate vendor-agnostic check on SPEC_CTRL cpuid bits Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 15/47] powerpc/bitops: Fix possible undefined behaviour with fls() and fls64() Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 16/47] jffs2: Allow setting rp_size to zero during remounting Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 17/47] jffs2: Fix NULL pointer dereference in rp_size fs option parsing Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 18/47] scsi: block: Fix a race in the runtime power management code Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 19/47] uapi: move constants from <linux/kernel.h> to <linux/const.h> Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 20/47] tools headers UAPI: Sync linux/const.h with the kernel headers Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 21/47] null_blk: Fix zone size initialization Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 22/47] of: fix linker-section match-table corruption Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 23/47] cgroup: Fix memory leak when parsing multiple source parameters Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 24/47] scsi: cxgb4i: Fix TLS dependency Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 25/47] Bluetooth: hci_h5: close serdev device and free hu in h5_close Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 26/47] reiserfs: add check for an invalid ih_entry_count Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 27/47] misc: vmw_vmci: fix kernel info-leak by initializing dbells in vmci_ctx_get_chkpt_doorbells() Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 28/47] media: gp8psk: initialize stats at power control logic Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 29/47] f2fs: fix shift-out-of-bounds in sanity_check_raw_super() Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 30/47] ALSA: seq: Use bool for snd_seq_queue internal flags Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 31/47] ALSA: rawmidi: Access runtime->avail always in spinlock Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 32/47] bfs: dont use WARNING: string when its just info Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 33/47] fcntl: Fix potential deadlock in send_sig{io, urg}() Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 34/47] rtc: sun6i: Fix memleak in sun6i_rtc_clk_init Greg Kroah-Hartman
2021-01-06 13:07   ` Pavel Machek
2021-01-04 15:57 ` [PATCH 5.4 35/47] module: set MODULE_STATE_GOING state when a module fails to load Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 36/47] quota: Dont overflow quota file offsets Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 37/47] rtc: pl031: fix resource leak in pl031_probe Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 38/47] powerpc: sysdev: add missing iounmap() on error in mpic_msgr_probe() Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 39/47] i3c master: fix missing destroy_workqueue() on error in i3c_master_register Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 40/47] NFSv4: Fix a pNFS layout related use-after-free race when freeing the inode Greg Kroah-Hartman
2021-01-04 15:57 ` Greg Kroah-Hartman [this message]
2021-01-04 15:57 ` [PATCH 5.4 42/47] module: delay kobject uevent until after module init call Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 43/47] fs/namespace.c: WARN if mnt_count has become negative Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 44/47] um: ubd: Submit all data segments atomically Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 45/47] tick/sched: Remove bogus boot "safety" check Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 46/47] ALSA: pcm: Clear the full allocated memory at hw_params Greg Kroah-Hartman
2021-01-04 15:57 ` [PATCH 5.4 47/47] dm verity: skip verity work if I/O error when system is shutting down Greg Kroah-Hartman
2021-01-04 21:09 ` [PATCH 5.4 00/47] 5.4.87-rc1 review Jon Hunter
2021-01-05  6:07 ` Daniel Díaz
2021-01-05 16:39 ` Shuah Khan
2021-01-05 18:17 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210104155707.716046859@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=Light.Hsieh@mediatek.com \
    --cc=jaegeuk@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=yuchao0@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.