* [f2fs-dev] [PATCH AUTOSEL 5.10 10/31] f2fs: Handle casefolding with Encryption
[not found] <20201230130314.3636961-1-sashal@kernel.org>
@ 2020-12-30 13:02 ` Sasha Levin
2020-12-30 18:01 ` Eric Biggers
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 11/31] f2fs: avoid race condition for shrinker count Sasha Levin
` (2 subsequent siblings)
3 siblings, 1 reply; 6+ messages in thread
From: Sasha Levin @ 2020-12-30 13:02 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Sasha Levin, Jaegeuk Kim, linux-f2fs-devel, Daniel Rosenberg,
Eric Biggers
From: Daniel Rosenberg <drosen@google.com>
[ Upstream commit 7ad08a58bf67594057362e45cbddd3e27e53e557 ]
Expand f2fs's casefolding support to include encrypted directories. To
index casefolded+encrypted directories, we use the SipHash of the
casefolded name, keyed by a key derived from the directory's fscrypt
master key. This ensures that the dirhash doesn't leak information
about the plaintext filenames.
Encryption keys are unavailable during roll-forward recovery, so we
can't compute the dirhash when recovering a new dentry in an encrypted +
casefolded directory. To avoid having to force a checkpoint when a new
file is fsync'ed, store the dirhash on-disk appended to i_name.
This patch incorporates work by Eric Biggers <ebiggers@google.com>
and Jaegeuk Kim <jaegeuk@kernel.org>.
Co-developed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Daniel Rosenberg <drosen@google.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/f2fs/dir.c | 98 +++++++++++++++++++++++++++++++++++-----------
fs/f2fs/f2fs.h | 8 ++--
fs/f2fs/hash.c | 11 +++++-
fs/f2fs/inline.c | 4 ++
fs/f2fs/recovery.c | 12 +++++-
fs/f2fs/super.c | 6 ---
6 files changed, 106 insertions(+), 33 deletions(-)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4b9ef8bbfa4a9..4ee0b7070c157 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -5,6 +5,7 @@
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
*/
+#include <asm/unaligned.h>
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/sched/signal.h>
@@ -206,30 +207,55 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
/*
* Test whether a case-insensitive directory entry matches the filename
* being searched for.
+ *
+ * Returns 1 for a match, 0 for no match, and -errno on an error.
*/
-static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
+static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
const u8 *de_name, u32 de_name_len)
{
const struct super_block *sb = dir->i_sb;
const struct unicode_map *um = sb->s_encoding;
+ struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
struct qstr entry = QSTR_INIT(de_name, de_name_len);
int res;
+ if (IS_ENCRYPTED(dir)) {
+ const struct fscrypt_str encrypted_name =
+ FSTR_INIT((u8 *)de_name, de_name_len);
+
+ if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir)))
+ return -EINVAL;
+
+ decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+ if (!decrypted_name.name)
+ return -ENOMEM;
+ res = fscrypt_fname_disk_to_usr(dir, 0, 0, &encrypted_name,
+ &decrypted_name);
+ if (res < 0)
+ goto out;
+ entry.name = decrypted_name.name;
+ entry.len = decrypted_name.len;
+ }
+
res = utf8_strncasecmp_folded(um, name, &entry);
- if (res < 0) {
- /*
- * In strict mode, ignore invalid names. In non-strict mode,
- * fall back to treating them as opaque byte sequences.
- */
- if (sb_has_strict_encoding(sb) || name->len != entry.len)
- return false;
- return !memcmp(name->name, entry.name, name->len);
+ /*
+ * In strict mode, ignore invalid names. In non-strict mode,
+ * fall back to treating them as opaque byte sequences.
+ */
+ if (res < 0 && !sb_has_strict_encoding(sb)) {
+ res = name->len == entry.len &&
+ memcmp(name->name, entry.name, name->len) == 0;
+ } else {
+ /* utf8_strncasecmp_folded returns 0 on match */
+ res = (res == 0);
}
- return res == 0;
+out:
+ kfree(decrypted_name.name);
+ return res;
}
#endif /* CONFIG_UNICODE */
-static inline bool f2fs_match_name(const struct inode *dir,
+static inline int f2fs_match_name(const struct inode *dir,
const struct f2fs_filename *fname,
const u8 *de_name, u32 de_name_len)
{
@@ -256,6 +282,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
int max_len = 0;
+ int res = 0;
if (max_slots)
*max_slots = 0;
@@ -273,10 +300,15 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
continue;
}
- if (de->hash_code == fname->hash &&
- f2fs_match_name(d->inode, fname, d->filename[bit_pos],
- le16_to_cpu(de->name_len)))
- goto found;
+ if (de->hash_code == fname->hash) {
+ res = f2fs_match_name(d->inode, fname,
+ d->filename[bit_pos],
+ le16_to_cpu(de->name_len));
+ if (res < 0)
+ return ERR_PTR(res);
+ if (res)
+ goto found;
+ }
if (max_slots && max_len > *max_slots)
*max_slots = max_len;
@@ -326,7 +358,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
}
de = find_in_block(dir, dentry_page, fname, &max_slots);
- if (de) {
+ if (IS_ERR(de)) {
+ *res_page = ERR_CAST(de);
+ de = NULL;
+ break;
+ } else if (de) {
*res_page = dentry_page;
break;
}
@@ -448,17 +484,39 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
f2fs_put_page(page, 1);
}
-static void init_dent_inode(const struct f2fs_filename *fname,
+static void init_dent_inode(struct inode *dir, struct inode *inode,
+ const struct f2fs_filename *fname,
struct page *ipage)
{
struct f2fs_inode *ri;
+ if (!fname) /* tmpfile case? */
+ return;
+
f2fs_wait_on_page_writeback(ipage, NODE, true, true);
/* copy name info. to this inode page */
ri = F2FS_INODE(ipage);
ri->i_namelen = cpu_to_le32(fname->disk_name.len);
memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len);
+ if (IS_ENCRYPTED(dir)) {
+ file_set_enc_name(inode);
+ /*
+ * Roll-forward recovery doesn't have encryption keys available,
+ * so it can't compute the dirhash for encrypted+casefolded
+ * filenames. Append it to i_name if possible. Else, disable
+ * roll-forward recovery of the dentry (i.e., make fsync'ing the
+ * file force a checkpoint) by setting LOST_PINO.
+ */
+ if (IS_CASEFOLDED(dir)) {
+ if (fname->disk_name.len + sizeof(f2fs_hash_t) <=
+ F2FS_NAME_LEN)
+ put_unaligned(fname->hash, (f2fs_hash_t *)
+ &ri->i_name[fname->disk_name.len]);
+ else
+ file_lost_pino(inode);
+ }
+ }
set_page_dirty(ipage);
}
@@ -541,11 +599,7 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir,
return page;
}
- if (fname) {
- init_dent_inode(fname, page);
- if (IS_ENCRYPTED(dir))
- file_set_enc_name(inode);
- }
+ init_dent_inode(dir, inode, fname, page);
/*
* This file should be checkpointed during fsync.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9a321c52facec..8cba43eb0b24a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -533,9 +533,11 @@ struct f2fs_filename {
#ifdef CONFIG_UNICODE
/*
* For casefolded directories: the casefolded name, but it's left NULL
- * if the original name is not valid Unicode or if the filesystem is
- * doing an internal operation where usr_fname is also NULL. In these
- * cases we fall back to treating the name as an opaque byte sequence.
+ * if the original name is not valid Unicode, if the directory is both
+ * casefolded and encrypted and its encryption key is unavailable, or if
+ * the filesystem is doing an internal operation where usr_fname is also
+ * NULL. In all these cases we fall back to treating the name as an
+ * opaque byte sequence.
*/
struct fscrypt_str cf_name;
#endif
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index de841aaf3c439..e3beac546c63a 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -111,7 +111,9 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
* If the casefolded name is provided, hash it instead of the
* on-disk name. If the casefolded name is *not* provided, that
* should only be because the name wasn't valid Unicode, so fall
- * back to treating the name as an opaque byte sequence.
+ * back to treating the name as an opaque byte sequence. Note
+ * that to handle encrypted directories, the fallback must use
+ * usr_fname (plaintext) rather than disk_name (ciphertext).
*/
WARN_ON_ONCE(!fname->usr_fname->name);
if (fname->cf_name.name) {
@@ -121,6 +123,13 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
name = fname->usr_fname->name;
len = fname->usr_fname->len;
}
+ if (IS_ENCRYPTED(dir)) {
+ struct qstr tmp = QSTR_INIT(name, len);
+
+ fname->hash =
+ cpu_to_le32(fscrypt_fname_siphash(dir, &tmp));
+ return;
+ }
}
#endif
fname->hash = cpu_to_le32(TEA_hash_name(name, len));
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 70384e31788db..92e9852d316ad 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -332,6 +332,10 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
make_dentry_ptr_inline(dir, &d, inline_dentry);
de = f2fs_find_target_dentry(&d, fname, NULL);
unlock_page(ipage);
+ if (IS_ERR(de)) {
+ *res_page = ERR_CAST(de);
+ de = NULL;
+ }
if (de)
*res_page = ipage;
else
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 4f12ade6410a1..0947d36af1a8e 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -5,6 +5,7 @@
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
*/
+#include <asm/unaligned.h>
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
@@ -128,7 +129,16 @@ static int init_recovered_filename(const struct inode *dir,
}
/* Compute the hash of the filename */
- if (IS_CASEFOLDED(dir)) {
+ if (IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)) {
+ /*
+ * In this case the hash isn't computable without the key, so it
+ * was saved on-disk.
+ */
+ if (fname->disk_name.len + sizeof(f2fs_hash_t) > F2FS_NAME_LEN)
+ return -EINVAL;
+ fname->hash = get_unaligned((f2fs_hash_t *)
+ &raw_inode->i_name[fname->disk_name.len]);
+ } else if (IS_CASEFOLDED(dir)) {
err = f2fs_init_casefolded_name(dir, fname);
if (err)
return err;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 00eff2f518079..ba859eeca69a3 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3399,12 +3399,6 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
struct unicode_map *encoding;
__u16 encoding_flags;
- if (f2fs_sb_has_encrypt(sbi)) {
- f2fs_err(sbi,
- "Can't mount with encoding and encryption");
- return -EINVAL;
- }
-
if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info,
&encoding_flags)) {
f2fs_err(sbi,
--
2.27.0
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [f2fs-dev] [PATCH AUTOSEL 5.10 11/31] f2fs: avoid race condition for shrinker count
[not found] <20201230130314.3636961-1-sashal@kernel.org>
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 10/31] f2fs: Handle casefolding with Encryption Sasha Levin
@ 2020-12-30 13:02 ` Sasha Levin
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 12/31] f2fs: fix race of pending_pages in decompression Sasha Levin
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 15/31] f2fs: fix shift-out-of-bounds in sanity_check_raw_super() Sasha Levin
3 siblings, 0 replies; 6+ messages in thread
From: Sasha Levin @ 2020-12-30 13:02 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Sasha Levin, Light Hsieh, linux-f2fs-devel, linux-mediatek,
Jaegeuk Kim, linux-arm-kernel
From: Jaegeuk Kim <jaegeuk@kernel.org>
[ Upstream commit a95ba66ac1457b76fe472c8e092ab1006271f16c ]
Light reported sometimes shinker gets nat_cnt < dirty_nat_cnt resulting in
wrong do_shinker work. Let's avoid to return insane overflowed value by adding
single tracking value.
Reported-by: Light Hsieh <Light.Hsieh@mediatek.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/f2fs/checkpoint.c | 2 +-
fs/f2fs/debug.c | 11 ++++++-----
fs/f2fs/f2fs.h | 10 ++++++++--
fs/f2fs/node.c | 29 ++++++++++++++++++-----------
fs/f2fs/node.h | 4 ++--
fs/f2fs/shrinker.c | 4 +---
6 files changed, 36 insertions(+), 24 deletions(-)
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 023462e80e58d..b39bf416d5114 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1600,7 +1600,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
goto out;
}
- if (NM_I(sbi)->dirty_nat_cnt == 0 &&
+ if (NM_I(sbi)->nat_cnt[DIRTY_NAT] == 0 &&
SIT_I(sbi)->dirty_sentries == 0 &&
prefree_segments(sbi) == 0) {
f2fs_flush_sit_entries(sbi, cpc);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a8357fd4f5fab..197c914119da8 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -145,8 +145,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->node_pages = NODE_MAPPING(sbi)->nrpages;
if (sbi->meta_inode)
si->meta_pages = META_MAPPING(sbi)->nrpages;
- si->nats = NM_I(sbi)->nat_cnt;
- si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
+ si->nats = NM_I(sbi)->nat_cnt[TOTAL_NAT];
+ si->dirty_nats = NM_I(sbi)->nat_cnt[DIRTY_NAT];
si->sits = MAIN_SEGS(sbi);
si->dirty_sits = SIT_I(sbi)->dirty_sentries;
si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID];
@@ -278,9 +278,10 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] +
NM_I(sbi)->nid_cnt[PREALLOC_NID]) *
sizeof(struct free_nid);
- si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
- si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
- sizeof(struct nat_entry_set);
+ si->cache_mem += NM_I(sbi)->nat_cnt[TOTAL_NAT] *
+ sizeof(struct nat_entry);
+ si->cache_mem += NM_I(sbi)->nat_cnt[DIRTY_NAT] *
+ sizeof(struct nat_entry_set);
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
for (i = 0; i < MAX_INO_ENTRY; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8cba43eb0b24a..14ace0e21d389 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -896,6 +896,13 @@ enum nid_state {
MAX_NID_STATE,
};
+enum nat_state {
+ TOTAL_NAT,
+ DIRTY_NAT,
+ RECLAIMABLE_NAT,
+ MAX_NAT_STATE,
+};
+
struct f2fs_nm_info {
block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */
@@ -911,8 +918,7 @@ struct f2fs_nm_info {
struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
struct list_head nat_entries; /* cached nat entry list (clean) */
spinlock_t nat_list_lock; /* protect clean nat entry list */
- unsigned int nat_cnt; /* the # of cached nat entries */
- unsigned int dirty_nat_cnt; /* total num of nat entries in set */
+ unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
unsigned int nat_blocks; /* # of nat blocks */
/* free node ids management */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d5d8ce077f295..56505b8e2b8d3 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -62,8 +62,8 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
sizeof(struct free_nid)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == NAT_ENTRIES) {
- mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
- PAGE_SHIFT;
+ mem_size = (nm_i->nat_cnt[TOTAL_NAT] *
+ sizeof(struct nat_entry)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
if (excess_cached_nats(sbi))
res = false;
@@ -177,7 +177,8 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
list_add_tail(&ne->list, &nm_i->nat_entries);
spin_unlock(&nm_i->nat_list_lock);
- nm_i->nat_cnt++;
+ nm_i->nat_cnt[TOTAL_NAT]++;
+ nm_i->nat_cnt[RECLAIMABLE_NAT]++;
return ne;
}
@@ -207,7 +208,8 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
{
radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
- nm_i->nat_cnt--;
+ nm_i->nat_cnt[TOTAL_NAT]--;
+ nm_i->nat_cnt[RECLAIMABLE_NAT]--;
__free_nat_entry(e);
}
@@ -253,7 +255,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
if (get_nat_flag(ne, IS_DIRTY))
goto refresh_list;
- nm_i->dirty_nat_cnt++;
+ nm_i->nat_cnt[DIRTY_NAT]++;
+ nm_i->nat_cnt[RECLAIMABLE_NAT]--;
set_nat_flag(ne, IS_DIRTY, true);
refresh_list:
spin_lock(&nm_i->nat_list_lock);
@@ -273,7 +276,8 @@ static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
set_nat_flag(ne, IS_DIRTY, false);
set->entry_cnt--;
- nm_i->dirty_nat_cnt--;
+ nm_i->nat_cnt[DIRTY_NAT]--;
+ nm_i->nat_cnt[RECLAIMABLE_NAT]++;
}
static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
@@ -2944,14 +2948,17 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
LIST_HEAD(sets);
int err = 0;
- /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
+ /*
+ * during unmount, let's flush nat_bits before checking
+ * nat_cnt[DIRTY_NAT].
+ */
if (enabled_nat_bits(sbi, cpc)) {
down_write(&nm_i->nat_tree_lock);
remove_nats_in_journal(sbi);
up_write(&nm_i->nat_tree_lock);
}
- if (!nm_i->dirty_nat_cnt)
+ if (!nm_i->nat_cnt[DIRTY_NAT])
return 0;
down_write(&nm_i->nat_tree_lock);
@@ -2962,7 +2969,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* into nat entry set.
*/
if (enabled_nat_bits(sbi, cpc) ||
- !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+ !__has_cursum_space(journal,
+ nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL))
remove_nats_in_journal(sbi);
while ((found = __gang_lookup_nat_set(nm_i,
@@ -3086,7 +3094,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
F2FS_RESERVED_NODE_NUM;
nm_i->nid_cnt[FREE_NID] = 0;
nm_i->nid_cnt[PREALLOC_NID] = 0;
- nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
@@ -3220,7 +3227,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
__del_from_nat_cache(nm_i, natvec[idx]);
}
}
- f2fs_bug_on(sbi, nm_i->nat_cnt);
+ f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]);
/* destroy nat set cache */
nid = 0;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 69e5859e993cf..f84541b57acbb 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -126,13 +126,13 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi)
{
- return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid *
+ return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid *
NM_I(sbi)->dirty_nats_ratio / 100;
}
static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
{
- return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
+ return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD;
}
static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index d66de5999a26d..dd3c3c7a90ec8 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -18,9 +18,7 @@ static unsigned int shrinker_run_no;
static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
{
- long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
-
- return count > 0 ? count : 0;
+ return NM_I(sbi)->nat_cnt[RECLAIMABLE_NAT];
}
static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
--
2.27.0
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [f2fs-dev] [PATCH AUTOSEL 5.10 12/31] f2fs: fix race of pending_pages in decompression
[not found] <20201230130314.3636961-1-sashal@kernel.org>
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 10/31] f2fs: Handle casefolding with Encryption Sasha Levin
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 11/31] f2fs: avoid race condition for shrinker count Sasha Levin
@ 2020-12-30 13:02 ` Sasha Levin
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 15/31] f2fs: fix shift-out-of-bounds in sanity_check_raw_super() Sasha Levin
3 siblings, 0 replies; 6+ messages in thread
From: Sasha Levin @ 2020-12-30 13:02 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Sasha Levin, Jaegeuk Kim, Daeho Jeong, linux-f2fs-devel
From: Daeho Jeong <daehojeong@google.com>
[ Upstream commit 6422a71ef40e4751d59b8c9412e7e2dafe085878 ]
I found out f2fs_free_dic() is invoked in a wrong timing, but
f2fs_verify_bio() still needed the dic info and it triggered the
below kernel panic. It has been caused by the race condition of
pending_pages value between decompression and verity logic, when
the same compression cluster had been split in different bios.
By split bios, f2fs_verify_bio() ended up with decreasing
pending_pages value before it is reset to nr_cpages by
f2fs_decompress_pages() and caused the kernel panic.
[ 4416.564763] Unable to handle kernel NULL pointer dereference
at virtual address 0000000000000000
...
[ 4416.896016] Workqueue: fsverity_read_queue f2fs_verity_work
[ 4416.908515] pc : fsverity_verify_page+0x20/0x78
[ 4416.913721] lr : f2fs_verify_bio+0x11c/0x29c
[ 4416.913722] sp : ffffffc019533cd0
[ 4416.913723] x29: ffffffc019533cd0 x28: 0000000000000402
[ 4416.913724] x27: 0000000000000001 x26: 0000000000000100
[ 4416.913726] x25: 0000000000000001 x24: 0000000000000004
[ 4416.913727] x23: 0000000000001000 x22: 0000000000000000
[ 4416.913728] x21: 0000000000000000 x20: ffffffff2076f9c0
[ 4416.913729] x19: ffffffff2076f9c0 x18: ffffff8a32380c30
[ 4416.913731] x17: ffffffc01f966d97 x16: 0000000000000298
[ 4416.913732] x15: 0000000000000000 x14: 0000000000000000
[ 4416.913733] x13: f074faec89ffffff x12: 0000000000000000
[ 4416.913734] x11: 0000000000001000 x10: 0000000000001000
[ 4416.929176] x9 : ffffffff20d1f5c7 x8 : 0000000000000000
[ 4416.929178] x7 : 626d7464ff286b6b x6 : ffffffc019533ade
[ 4416.929179] x5 : 000000008049000e x4 : ffffffff2793e9e0
[ 4416.929180] x3 : 000000008049000e x2 : ffffff89ecfa74d0
[ 4416.929181] x1 : 0000000000000c40 x0 : ffffffff2076f9c0
[ 4416.929184] Call trace:
[ 4416.929187] fsverity_verify_page+0x20/0x78
[ 4416.929189] f2fs_verify_bio+0x11c/0x29c
[ 4416.929192] f2fs_verity_work+0x58/0x84
[ 4417.050667] process_one_work+0x270/0x47c
[ 4417.055354] worker_thread+0x27c/0x4d8
[ 4417.059784] kthread+0x13c/0x320
[ 4417.063693] ret_from_fork+0x10/0x18
Chao pointed this can happen by the below race condition.
Thread A f2fs_post_read_wq fsverity_wq
- f2fs_read_multi_pages()
- f2fs_alloc_dic
- dic->pending_pages = 2
- submit_bio()
- submit_bio()
- f2fs_post_read_work() handle first bio
- f2fs_decompress_work()
- __read_end_io()
- f2fs_decompress_pages()
- dic->pending_pages--
- enqueue f2fs_verity_work()
- f2fs_verity_work() handle first bio
- f2fs_verify_bio()
- dic->pending_pages--
- f2fs_post_read_work() handle second bio
- f2fs_decompress_work()
- enqueue f2fs_verity_work()
- f2fs_verify_pages()
- f2fs_free_dic()
- f2fs_verity_work() handle second bio
- f2fs_verfy_bio()
- use-after-free on dic
Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/f2fs/compress.c | 2 --
fs/f2fs/data.c | 58 +++++++++++++++++++++++++++++++++++++---------
fs/f2fs/f2fs.h | 1 +
3 files changed, 48 insertions(+), 13 deletions(-)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 14262e0f1cd60..c5fee4d7ea72f 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -798,8 +798,6 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
if (cops->destroy_decompress_ctx)
cops->destroy_decompress_ctx(dic);
out_free_dic:
- if (verity)
- atomic_set(&dic->pending_pages, dic->nr_cpages);
if (!verity)
f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
ret, false);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index be4da52604edc..b29243ee1c3e5 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -202,7 +202,7 @@ static void f2fs_verify_bio(struct bio *bio)
dic = (struct decompress_io_ctx *)page_private(page);
if (dic) {
- if (atomic_dec_return(&dic->pending_pages))
+ if (atomic_dec_return(&dic->verity_pages))
continue;
f2fs_verify_pages(dic->rpages,
dic->cluster_size);
@@ -1027,7 +1027,8 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
unsigned nr_pages, unsigned op_flag,
- pgoff_t first_idx, bool for_write)
+ pgoff_t first_idx, bool for_write,
+ bool for_verity)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
@@ -1049,7 +1050,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
post_read_steps |= 1 << STEP_DECRYPT;
if (f2fs_compressed_file(inode))
post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
- if (f2fs_need_verity(inode, first_idx))
+ if (for_verity && f2fs_need_verity(inode, first_idx))
post_read_steps |= 1 << STEP_VERITY;
if (post_read_steps) {
@@ -1079,7 +1080,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
struct bio *bio;
bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
- page->index, for_write);
+ page->index, for_write, true);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -2133,7 +2134,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
if (bio == NULL) {
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
is_readahead ? REQ_RAHEAD : 0, page->index,
- false);
+ false, true);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
bio = NULL;
@@ -2180,6 +2181,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
const unsigned blkbits = inode->i_blkbits;
const unsigned blocksize = 1 << blkbits;
struct decompress_io_ctx *dic = NULL;
+ struct bio_post_read_ctx *ctx;
+ bool for_verity = false;
int i;
int ret = 0;
@@ -2245,10 +2248,29 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
goto out_put_dnode;
}
+ /*
+ * It's possible to enable fsverity on the fly when handling a cluster,
+ * which requires complicated error handling. Instead of adding more
+ * complexity, let's give a rule where end_io post-processes fsverity
+ * per cluster. In order to do that, we need to submit bio, if previous
+ * bio sets a different post-process policy.
+ */
+ if (fsverity_active(cc->inode)) {
+ atomic_set(&dic->verity_pages, cc->nr_cpages);
+ for_verity = true;
+
+ if (bio) {
+ ctx = bio->bi_private;
+ if (!(ctx->enabled_steps & (1 << STEP_VERITY))) {
+ __submit_bio(sbi, bio, DATA);
+ bio = NULL;
+ }
+ }
+ }
+
for (i = 0; i < dic->nr_cpages; i++) {
struct page *page = dic->cpages[i];
block_t blkaddr;
- struct bio_post_read_ctx *ctx;
blkaddr = data_blkaddr(dn.inode, dn.node_page,
dn.ofs_in_node + i + 1);
@@ -2264,17 +2286,31 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
if (!bio) {
bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
is_readahead ? REQ_RAHEAD : 0,
- page->index, for_write);
+ page->index, for_write, for_verity);
if (IS_ERR(bio)) {
+ unsigned int remained = dic->nr_cpages - i;
+ bool release = false;
+
ret = PTR_ERR(bio);
dic->failed = true;
- if (!atomic_sub_return(dic->nr_cpages - i,
- &dic->pending_pages)) {
+
+ if (for_verity) {
+ if (!atomic_sub_return(remained,
+ &dic->verity_pages))
+ release = true;
+ } else {
+ if (!atomic_sub_return(remained,
+ &dic->pending_pages))
+ release = true;
+ }
+
+ if (release) {
f2fs_decompress_end_io(dic->rpages,
- cc->cluster_size, true,
- false);
+ cc->cluster_size, true,
+ false);
f2fs_free_dic(dic);
}
+
f2fs_put_dnode(&dn);
*bio_ret = NULL;
return ret;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 14ace0e21d389..72a25387f49bc 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1412,6 +1412,7 @@ struct decompress_io_ctx {
size_t rlen; /* valid data length in rbuf */
size_t clen; /* valid data length in cbuf */
atomic_t pending_pages; /* in-flight compressed page count */
+ atomic_t verity_pages; /* in-flight page count for verity */
bool failed; /* indicate IO error during decompression */
void *private; /* payload buffer for specified decompression algorithm */
void *private2; /* extra payload buffer */
--
2.27.0
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [f2fs-dev] [PATCH AUTOSEL 5.10 15/31] f2fs: fix shift-out-of-bounds in sanity_check_raw_super()
[not found] <20201230130314.3636961-1-sashal@kernel.org>
` (2 preceding siblings ...)
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 12/31] f2fs: fix race of pending_pages in decompression Sasha Levin
@ 2020-12-30 13:02 ` Sasha Levin
3 siblings, 0 replies; 6+ messages in thread
From: Sasha Levin @ 2020-12-30 13:02 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Sasha Levin, Anant Thazhemadam, syzbot+ca9a785f8ac472085994,
linux-f2fs-devel, Jaegeuk Kim
From: Chao Yu <yuchao0@huawei.com>
[ Upstream commit e584bbe821229a3e7cc409eecd51df66f9268c21 ]
syzbot reported a bug which could cause shift-out-of-bounds issue,
fix it.
Call Trace:
__dump_stack lib/dump_stack.c:79 [inline]
dump_stack+0x107/0x163 lib/dump_stack.c:120
ubsan_epilogue+0xb/0x5a lib/ubsan.c:148
__ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395
sanity_check_raw_super fs/f2fs/super.c:2812 [inline]
read_raw_super_block fs/f2fs/super.c:3267 [inline]
f2fs_fill_super.cold+0x16c9/0x16f6 fs/f2fs/super.c:3519
mount_bdev+0x34d/0x410 fs/super.c:1366
legacy_get_tree+0x105/0x220 fs/fs_context.c:592
vfs_get_tree+0x89/0x2f0 fs/super.c:1496
do_new_mount fs/namespace.c:2896 [inline]
path_mount+0x12ae/0x1e70 fs/namespace.c:3227
do_mount fs/namespace.c:3240 [inline]
__do_sys_mount fs/namespace.c:3448 [inline]
__se_sys_mount fs/namespace.c:3425 [inline]
__x64_sys_mount+0x27f/0x300 fs/namespace.c:3425
do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Reported-by: syzbot+ca9a785f8ac472085994@syzkaller.appspotmail.com
Signed-off-by: Anant Thazhemadam <anant.thazhemadam@gmail.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/f2fs/super.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ba859eeca69a3..822ae8c8fa39a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2744,7 +2744,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
block_t total_sections, blocks_per_seg;
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
(bh->b_data + F2FS_SUPER_OFFSET);
- unsigned int blocksize;
size_t crc_offset = 0;
__u32 crc = 0;
@@ -2778,10 +2777,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
}
/* Currently, support only 4KB block size */
- blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
- if (blocksize != F2FS_BLKSIZE) {
- f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB",
- blocksize);
+ if (le32_to_cpu(raw_super->log_blocksize) != F2FS_BLKSIZE_BITS) {
+ f2fs_info(sbi, "Invalid log_blocksize (%u), supports only %u",
+ le32_to_cpu(raw_super->log_blocksize),
+ F2FS_BLKSIZE_BITS);
return -EFSCORRUPTED;
}
--
2.27.0
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [f2fs-dev] [PATCH AUTOSEL 5.10 10/31] f2fs: Handle casefolding with Encryption
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 10/31] f2fs: Handle casefolding with Encryption Sasha Levin
@ 2020-12-30 18:01 ` Eric Biggers
2021-01-04 14:20 ` Sasha Levin
0 siblings, 1 reply; 6+ messages in thread
From: Eric Biggers @ 2020-12-30 18:01 UTC (permalink / raw)
To: Sasha Levin
Cc: Jaegeuk Kim, Daniel Rosenberg, linux-kernel, stable,
linux-f2fs-devel
On Wed, Dec 30, 2020 at 08:02:52AM -0500, Sasha Levin wrote:
> From: Daniel Rosenberg <drosen@google.com>
>
> [ Upstream commit 7ad08a58bf67594057362e45cbddd3e27e53e557 ]
>
> Expand f2fs's casefolding support to include encrypted directories. To
> index casefolded+encrypted directories, we use the SipHash of the
> casefolded name, keyed by a key derived from the directory's fscrypt
> master key. This ensures that the dirhash doesn't leak information
> about the plaintext filenames.
>
> Encryption keys are unavailable during roll-forward recovery, so we
> can't compute the dirhash when recovering a new dentry in an encrypted +
> casefolded directory. To avoid having to force a checkpoint when a new
> file is fsync'ed, store the dirhash on-disk appended to i_name.
>
> This patch incorporates work by Eric Biggers <ebiggers@google.com>
> and Jaegeuk Kim <jaegeuk@kernel.org>.
>
> Co-developed-by: Eric Biggers <ebiggers@google.com>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> Signed-off-by: Daniel Rosenberg <drosen@google.com>
> Reviewed-by: Eric Biggers <ebiggers@google.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
> Signed-off-by: Sasha Levin <sashal@kernel.org>
Please don't backport this to the LTS kernels. This is a new feature, not a
fix, and you missed prerequisite patches...
- Eric
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [f2fs-dev] [PATCH AUTOSEL 5.10 10/31] f2fs: Handle casefolding with Encryption
2020-12-30 18:01 ` Eric Biggers
@ 2021-01-04 14:20 ` Sasha Levin
0 siblings, 0 replies; 6+ messages in thread
From: Sasha Levin @ 2021-01-04 14:20 UTC (permalink / raw)
To: Eric Biggers
Cc: Jaegeuk Kim, Daniel Rosenberg, linux-kernel, stable,
linux-f2fs-devel
On Wed, Dec 30, 2020 at 10:01:56AM -0800, Eric Biggers wrote:
>On Wed, Dec 30, 2020 at 08:02:52AM -0500, Sasha Levin wrote:
>> From: Daniel Rosenberg <drosen@google.com>
>>
>> [ Upstream commit 7ad08a58bf67594057362e45cbddd3e27e53e557 ]
>>
>> Expand f2fs's casefolding support to include encrypted directories. To
>> index casefolded+encrypted directories, we use the SipHash of the
>> casefolded name, keyed by a key derived from the directory's fscrypt
>> master key. This ensures that the dirhash doesn't leak information
>> about the plaintext filenames.
>>
>> Encryption keys are unavailable during roll-forward recovery, so we
>> can't compute the dirhash when recovering a new dentry in an encrypted +
>> casefolded directory. To avoid having to force a checkpoint when a new
>> file is fsync'ed, store the dirhash on-disk appended to i_name.
>>
>> This patch incorporates work by Eric Biggers <ebiggers@google.com>
>> and Jaegeuk Kim <jaegeuk@kernel.org>.
>>
>> Co-developed-by: Eric Biggers <ebiggers@google.com>
>> Signed-off-by: Eric Biggers <ebiggers@google.com>
>> Signed-off-by: Daniel Rosenberg <drosen@google.com>
>> Reviewed-by: Eric Biggers <ebiggers@google.com>
>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
>> Signed-off-by: Sasha Levin <sashal@kernel.org>
>
>Please don't backport this to the LTS kernels. This is a new feature, not a
>fix, and you missed prerequisite patches...
Sure, I'l drop it. Thanks!
--
Thanks,
Sasha
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2021-01-04 14:21 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20201230130314.3636961-1-sashal@kernel.org>
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 10/31] f2fs: Handle casefolding with Encryption Sasha Levin
2020-12-30 18:01 ` Eric Biggers
2021-01-04 14:20 ` Sasha Levin
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 11/31] f2fs: avoid race condition for shrinker count Sasha Levin
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 12/31] f2fs: fix race of pending_pages in decompression Sasha Levin
2020-12-30 13:02 ` [f2fs-dev] [PATCH AUTOSEL 5.10 15/31] f2fs: fix shift-out-of-bounds in sanity_check_raw_super() Sasha Levin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).