From: Chao Yu <yuchao0@huawei.com>
To: jaegeuk@kernel.org
Cc: chao@kernel.org, linux-kernel@vger.kernel.org,
linux-f2fs-devel@lists.sourceforge.net
Subject: [PATCH] f2fs: fix fdatasync
Date: Wed, 16 Nov 2016 20:12:11 +0800 [thread overview]
Message-ID: <20161116121211.11790-1-yuchao0@huawei.com> (raw)
For below two cases, we can't guarantee data consistence:
a)
1. xfs_io "pwrite 0 4195328" "fsync"
2. xfs_io "pwrite 4195328 1024" "fdatasync"
3. godown
4. umount & mount
--> isize we updated before fdatasync won't be recovered
b)
1. xfs_io "pwrite -S 0xcc 0 4202496" "fsync"
2. xfs_io "fpunch 4194304 4096" "fdatasync"
3. godown
4. umount & mount
--> dnode we punched before fdatasync won't be recovered
The reason is that normally fdatasync won't be aware of modification
of metadata in file, e.g. isize changing, dnode updating, so in ->fsync
we will skip flushing node pages for above cases, result in making
fdatasynced file being lost during recovery.
Introduce FDATASYNC_INO global ino cache for tracking node changing,
later fdatasync choose to flush nodes depend on ino cache state.
Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
fs/f2fs/checkpoint.c | 13 ++++++++++++-
fs/f2fs/f2fs.h | 7 +++++++
fs/f2fs/file.c | 11 +++++++++--
fs/f2fs/node.c | 5 ++++-
4 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 5039ed8..27d5679 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -464,12 +464,23 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
return e ? true : false;
}
+bool need_flush_nodes(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ struct inode_management *im = &sbi->im[FDATASYNC_INO];
+ struct ino_entry *e;
+
+ spin_lock(&im->ino_lock);
+ e = radix_tree_lookup(&im->ino_root, ino);
+ spin_unlock(&im->ino_lock);
+ return e ? true : false;
+}
+
void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
{
struct ino_entry *e, *tmp;
int i;
- for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) {
+ for (i = all ? ORPHAN_INO: APPEND_INO; i < MAX_INO_ENTRY; i++) {
struct inode_management *im = &sbi->im[i];
spin_lock(&im->ino_lock);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cf74ec6..0978c58 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -161,6 +161,7 @@ enum {
ORPHAN_INO, /* for orphan ino list */
APPEND_INO, /* for append ino list */
UPDATE_INO, /* for update ino list */
+ FDATASYNC_INO, /* need to flush nodes during fdatasync */
MAX_INO_ENTRY, /* max. list */
};
@@ -1695,6 +1696,7 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc)
f2fs_mark_inode_dirty_sync(inode, true);
}
+void add_ino_entry(struct f2fs_sb_info *, nid_t, int);
static inline void f2fs_i_blocks_write(struct inode *inode,
blkcnt_t diff, bool add)
{
@@ -1706,6 +1708,8 @@ static inline void f2fs_i_blocks_write(struct inode *inode,
f2fs_mark_inode_dirty_sync(inode, true);
if (clean || recover)
set_inode_flag(inode, FI_AUTO_RECOVER);
+
+ add_ino_entry(F2FS_I_SB(inode), inode->i_ino, FDATASYNC_INO);
}
static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size)
@@ -1720,6 +1724,8 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size)
f2fs_mark_inode_dirty_sync(inode, true);
if (clean || recover)
set_inode_flag(inode, FI_AUTO_RECOVER);
+
+ add_ino_entry(F2FS_I_SB(inode), inode->i_ino, FDATASYNC_INO);
}
static inline bool f2fs_skip_inode_update(struct inode *inode)
@@ -2150,6 +2156,7 @@ void add_ino_entry(struct f2fs_sb_info *, nid_t, int type);
void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type);
void release_ino_entry(struct f2fs_sb_info *, bool);
bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
+bool need_flush_nodes(struct f2fs_sb_info *, nid_t);
int f2fs_sync_inode_meta(struct f2fs_sb_info *);
int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 47b7b13..75017c2 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -209,8 +209,13 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
return ret;
}
- /* if the inode is dirty, let's recover all the time */
- if (!datasync && !f2fs_skip_inode_update(inode)) {
+ if (datasync) {
+ if (need_flush_nodes(sbi, ino)) {
+ f2fs_write_inode(inode, NULL);
+ goto go_write;
+ }
+ } else if (!f2fs_skip_inode_update(inode)) {
+ /* if the inode is dirty, let's recover all the time */
f2fs_write_inode(inode, NULL);
goto go_write;
}
@@ -276,6 +281,8 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
/* once recovery info is written, don't need to tack this */
remove_ino_entry(sbi, ino, APPEND_INO);
clear_inode_flag(inode, FI_APPEND_WRITE);
+
+ remove_ino_entry(sbi, ino, FDATASYNC_INO);
flush_out:
remove_ino_entry(sbi, ino, UPDATE_INO);
clear_inode_flag(inode, FI_UPDATE_WRITE);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 1b5b31a..0974d5b 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -62,7 +62,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
} else if (type == INO_ENTRIES) {
int i;
- for (i = 0; i <= UPDATE_INO; i++)
+ for (i = 0; i < MAX_INO_ENTRY; i++)
mem_size += (sbi->im[i].ino_num *
sizeof(struct ino_entry)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
@@ -1670,6 +1670,9 @@ static int f2fs_set_node_page_dirty(struct page *page)
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
SetPagePrivate(page);
f2fs_trace_pid(page);
+
+ add_ino_entry(F2FS_P_SB(page), ino_of_node(page),
+ FDATASYNC_INO);
return 1;
}
return 0;
--
2.8.2.311.gee88674
------------------------------------------------------------------------------
next reply other threads:[~2016-11-16 12:12 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-11-16 12:12 Chao Yu [this message]
2016-11-16 12:15 ` [PATCH] f2fs: fix fdatasync Christoph Hellwig
2016-11-17 1:13 ` Chao Yu
2016-11-17 2:30 ` Jaegeuk Kim
2016-11-16 19:13 ` Jaegeuk Kim
2016-11-17 2:51 ` Chao Yu
2016-11-17 2:59 ` Jaegeuk Kim
2016-11-17 3:35 ` Chao Yu
2016-11-17 3:41 ` Jaegeuk Kim
2016-11-17 12:12 ` Chao Yu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20161116121211.11790-1-yuchao0@huawei.com \
--to=yuchao0@huawei.com \
--cc=chao@kernel.org \
--cc=jaegeuk@kernel.org \
--cc=linux-f2fs-devel@lists.sourceforge.net \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).