From: Jan Kara <jack@suse.cz>
To: linux-ext4@vger.kernel.org
Cc: Jan Kara <jack@suse.cz>
Subject: [PATCH 3/3] ext4: Improve scalability of ext4 orphan file handling
Date: Thu, 16 Apr 2015 17:42:57 +0200 [thread overview]
Message-ID: <1429198977-5637-4-git-send-email-jack@suse.cz> (raw)
In-Reply-To: <1429198977-5637-1-git-send-email-jack@suse.cz>
Even though the length of the critical section when adding / removing
orphaned inodes was significantly reduced by using orphan file, the
contention of lock protecting orphan file still appears high in profiles
for truncate / unlink intensive workloads with high number of threads.
This patch makes handling of orphan file completely lockless. Also to
reduce conflicts between CPUs different CPUs start searching for empty
slot in orphan file in different blocks.
Performance comparison of locked orphan file handling, lockless orphan
file handling, and completely disabled orphan inode handling
from 48 CPU Xeon Server with 32 GB of RAM, filesystem located on
ramdisk, average of 5 runs:
stress-orphan (microbenchmark truncating files byte-by-byte from N
processes in parallel)
Threads Time Time Time
Orphan locked Orphan lockless No orphan
1 1.260000 1.379800 1.287000
2 2.455000 2.323800 2.314400
4 3.848400 3.704000 3.680400
8 6.833000 6.711800 6.844600
16 12.883200 12.931400 13.131600
32 25.342200 22.521600 22.570200
64 50.918400 36.380000 36.106200
128 102.666000 71.349800 71.716800
So we can see that with lockless orphan file handling, addition /
deletion of orphaned inodes got completely out of picture even for a
microbenchmark stressing it.
reaim new_fserver workload didn't show any gains / losses outside of
error margin.
Signed-off-by: Jan Kara <jack@suse.cz>
---
fs/ext4/ext4.h | 2 +-
fs/ext4/namei.c | 40 ++++++++++++++++++++++++----------------
fs/ext4/super.c | 5 +++--
3 files changed, 28 insertions(+), 19 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 768a8b9ee2f9..89de44af986c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1220,7 +1220,7 @@ static inline int ext4_inodes_per_orphan_block(struct super_block *sb)
}
struct ext4_orphan_block {
- int ob_free_entries; /* Number of free orphan entries in block */
+ atomic_t ob_free_entries; /* Number of free orphan entries in block */
struct buffer_head *ob_bh; /* Buffer for orphan block */
};
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 3436b7fa0ef9..0c0649441a0f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2531,20 +2531,27 @@ static int empty_dir(struct inode *inode)
static int ext4_orphan_file_add(handle_t *handle, struct inode *inode)
{
- int i, j;
+ int i, j, start;
struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info;
int ret = 0;
__le32 *bdata;
int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb);
- spin_lock(&oi->of_lock);
- for (i = 0; i < oi->of_blocks && !oi->of_binfo[i].ob_free_entries; i++);
- if (i == oi->of_blocks) {
- spin_unlock(&oi->of_lock);
+ /*
+ * Find block with free orphan entry. Use CPU number for a naive hash
+ * for a search start in the orphan file
+ */
+ start = raw_smp_processor_id()*13 % oi->of_blocks;
+ i = start;
+ do {
+ if (atomic_dec_if_positive(&oi->of_binfo[i].ob_free_entries)
+ >= 0)
+ break;
+ if (++i >= oi->of_blocks)
+ i = 0;
+ } while (i != start);
+ if (i == start)
return -ENOSPC;
- }
- oi->of_binfo[i].ob_free_entries--;
- spin_unlock(&oi->of_lock);
/*
* Get access to orphan block. We have dropped of_lock but since we
@@ -2557,14 +2564,17 @@ static int ext4_orphan_file_add(handle_t *handle, struct inode *inode)
return ret;
bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
- spin_lock(&oi->of_lock);
/* Find empty slot in a block */
- for (j = 0; j < inodes_per_ob && bdata[j]; j++);
- BUG_ON(j == inodes_per_ob);
- bdata[j] = cpu_to_le32(inode->i_ino);
+ j = 0;
+ do {
+ while (bdata[j]) {
+ if (++j >= inodes_per_ob)
+ j = 0;
+ }
+ } while (cmpxchg(&bdata[j], 0, cpu_to_le32(inode->i_ino)) != 0);
+
EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j;
ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
- spin_unlock(&oi->of_lock);
return ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[i].ob_bh);
}
@@ -2687,10 +2697,8 @@ static int ext4_orphan_file_del(handle_t *handle, struct inode *inode)
goto out;
bdata = (__le32 *)(oi->of_binfo[blk].ob_bh->b_data);
- spin_lock(&oi->of_lock);
bdata[off] = 0;
- oi->of_binfo[blk].ob_free_entries++;
- spin_unlock(&oi->of_lock);
+ atomic_inc(&oi->of_binfo[blk].ob_free_entries);
ret = ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[blk].ob_bh);
out:
ext4_clear_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 14c30a9ef509..4d2fcc8483b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3542,7 +3542,7 @@ static int ext4_init_orphan_info(struct super_block *sb)
for (j = 0; j < inodes_per_ob; j++)
if (bdata[j] == 0)
free++;
- oi->of_binfo[i].ob_free_entries = free;
+ atomic_set(&oi->of_binfo[i].ob_free_entries, free);
}
iput(inode);
return 0;
@@ -4924,7 +4924,8 @@ static int ext4_orphan_file_empty(struct super_block *sb)
if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_ORPHAN_FILE))
return 1;
for (i = 0; i < oi->of_blocks; i++)
- if (oi->of_binfo[i].ob_free_entries != inodes_per_ob)
+ if (atomic_read(&oi->of_binfo[i].ob_free_entries) !=
+ inodes_per_ob)
return 0;
return 1;
}
--
2.1.4
prev parent reply other threads:[~2015-04-16 15:43 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-04-16 15:42 [PATCH 0/3 RFC] ext4: Speedup orphan file handling Jan Kara
2015-04-16 15:42 ` [PATCH 1/3] ext4: Support for checksumming from journal triggers Jan Kara
2015-04-17 19:00 ` Andreas Dilger
2015-04-20 9:07 ` Jan Kara
2015-04-16 15:42 ` [PATCH 2/3] ext4: Speedup ext4 orphan inode handling Jan Kara
[not found] ` <CAOQ4uxifVr1swHb5Y2M-TRuzwdDo-z92G6PuHvBGecGZ7nYuHg@mail.gmail.com>
2015-04-17 6:09 ` Amir Goldstein
2015-04-17 7:15 ` Jan Kara
2015-04-17 22:21 ` Andreas Dilger
2015-04-17 23:53 ` Andreas Dilger
2015-04-18 1:13 ` Darrick J. Wong
2015-04-20 12:34 ` Jan Kara
2015-04-20 12:25 ` Jan Kara
2015-04-20 16:35 ` Andreas Dilger
2015-04-21 10:56 ` Jan Kara
2015-04-21 15:46 ` Andreas Dilger
2015-04-18 23:53 ` Theodore Ts'o
2015-04-20 9:32 ` Jan Kara
2015-04-16 15:42 ` Jan Kara [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1429198977-5637-4-git-send-email-jack@suse.cz \
--to=jack@suse.cz \
--cc=linux-ext4@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).