* [PATCH 2/2] tmpfs: Make tmpfs scalable with caches for free blocks
@ 2010-05-18 23:34 tim
2010-05-20 23:13 ` Andrew Morton
0 siblings, 1 reply; 3+ messages in thread
From: tim @ 2010-05-18 23:34 UTC (permalink / raw)
To: linux-kernel; +Cc: Andi Kleen
The current implementation of tmpfs is not scalable.
The stat_lock is contended whenever we need to get a
new page, leading to lots of lock contentions. This patch
makes use of the qtoken library to maintain local
caches of free pages to speed up getting and returning
of pages without acquisition of stat_lock. It
improved the performance of tmpfs by 270% for Aim7 fserver
workload.
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
include/linux/shmem_fs.h | 4 ++-
mm/shmem.c | 58 +++++++++++++++++++++++++--------------------
2 files changed, 35 insertions(+), 27 deletions(-)
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index e164291..6ba014d 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -3,10 +3,12 @@
#include <linux/swap.h>
#include <linux/mempolicy.h>
+#include <linux/qtoken.h>
/* inode in-kernel data */
#define SHMEM_NR_DIRECT 16
+#define SHMEM_FREE_BLK_CACHE_SZ 512
struct shmem_inode_info {
spinlock_t lock;
@@ -23,7 +25,7 @@ struct shmem_inode_info {
struct shmem_sb_info {
unsigned long max_blocks; /* How many blocks are allowed */
- unsigned long free_blocks; /* How many are left for allocation */
+ struct qtoken token_jar; /* Token jar of free blocks */
unsigned long max_inodes; /* How many inodes are allowed */
unsigned long free_inodes; /* How many are left for allocation */
spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
diff --git a/mm/shmem.c b/mm/shmem.c
index eef4ebe..0ff3b73 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -29,6 +29,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/swap.h>
+#include <linux/qtoken.h>
static struct vfsmount *shm_mnt;
@@ -233,10 +234,10 @@ static void shmem_free_blocks(struct inode *inode, long pages)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo->max_blocks) {
- spin_lock(&sbinfo->stat_lock);
- sbinfo->free_blocks += pages;
+ spin_lock(&inode->i_lock);
+ qtoken_return(&sbinfo->token_jar, pages);
inode->i_blocks -= pages*BLOCKS_PER_PAGE;
- spin_unlock(&sbinfo->stat_lock);
+ spin_unlock(&inode->i_lock);
}
}
@@ -416,19 +417,18 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
if (sgp == SGP_READ)
return shmem_swp_map(ZERO_PAGE(0));
/*
- * Test free_blocks against 1 not 0, since we have 1 data
- * page (and perhaps indirect index pages) yet to allocate:
+ * leave 1 pg in reserve in token jar, since we have
+ * 1 data pg (and perhaps indirect index pages) yet to allocate:
* a waste to allocate index if we cannot allocate data.
*/
if (sbinfo->max_blocks) {
- spin_lock(&sbinfo->stat_lock);
- if (sbinfo->free_blocks <= 1) {
- spin_unlock(&sbinfo->stat_lock);
+ spin_lock(&inode->i_lock);
+ if (qtoken_get(&sbinfo->token_jar, 1, 1) == 0) {
+ spin_unlock(&inode->i_lock);
return ERR_PTR(-ENOSPC);
}
- sbinfo->free_blocks--;
inode->i_blocks += BLOCKS_PER_PAGE;
- spin_unlock(&sbinfo->stat_lock);
+ spin_unlock(&inode->i_lock);
}
spin_unlock(&info->lock);
@@ -1385,17 +1385,20 @@ repeat:
shmem_swp_unmap(entry);
sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo->max_blocks) {
- spin_lock(&sbinfo->stat_lock);
- if (sbinfo->free_blocks == 0 ||
- shmem_acct_block(info->flags)) {
- spin_unlock(&sbinfo->stat_lock);
+ if (shmem_acct_block(info->flags)) {
+ spin_unlock(&info->lock);
+ error = -ENOSPC;
+ goto failed;
+ }
+ spin_lock(&inode->i_lock);
+ if (qtoken_get(&sbinfo->token_jar, 1, 0) == 0) {
+ spin_unlock(&inode->i_lock);
spin_unlock(&info->lock);
error = -ENOSPC;
goto failed;
}
- sbinfo->free_blocks--;
inode->i_blocks += BLOCKS_PER_PAGE;
- spin_unlock(&sbinfo->stat_lock);
+ spin_unlock(&inode->i_lock);
} else if (shmem_acct_block(info->flags)) {
spin_unlock(&info->lock);
error = -ENOSPC;
@@ -1794,7 +1797,7 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
spin_lock(&sbinfo->stat_lock);
if (sbinfo->max_blocks) {
buf->f_blocks = sbinfo->max_blocks;
- buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
+ buf->f_bavail = buf->f_bfree = qtoken_avail(&sbinfo->token_jar);
}
if (sbinfo->max_inodes) {
buf->f_files = sbinfo->max_inodes;
@@ -2250,7 +2253,6 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
struct shmem_sb_info config = *sbinfo;
- unsigned long blocks;
unsigned long inodes;
int error = -EINVAL;
@@ -2258,12 +2260,6 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
return error;
spin_lock(&sbinfo->stat_lock);
- blocks = sbinfo->max_blocks - sbinfo->free_blocks;
- inodes = sbinfo->max_inodes - sbinfo->free_inodes;
- if (config.max_blocks < blocks)
- goto out;
- if (config.max_inodes < inodes)
- goto out;
/*
* Those tests also disallow limited->unlimited while any are in
* use, so i_blocks will always be zero when max_blocks is zero;
@@ -2274,10 +2270,14 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
goto out;
if (config.max_inodes && !sbinfo->max_inodes)
goto out;
+ inodes = sbinfo->max_inodes - sbinfo->free_inodes;
+ if (config.max_inodes < inodes)
+ goto out;
+ if (!qtoken_resize(&sbinfo->token_jar, config.max_blocks))
+ goto out;
error = 0;
sbinfo->max_blocks = config.max_blocks;
- sbinfo->free_blocks = config.max_blocks - blocks;
sbinfo->max_inodes = config.max_inodes;
sbinfo->free_inodes = config.max_inodes - inodes;
@@ -2310,6 +2310,10 @@ static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
static void shmem_put_super(struct super_block *sb)
{
+ struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+
+ if (sbinfo)
+ qtoken_put(&sbinfo->token_jar);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
}
@@ -2352,8 +2356,10 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
#endif
spin_lock_init(&sbinfo->stat_lock);
- sbinfo->free_blocks = sbinfo->max_blocks;
sbinfo->free_inodes = sbinfo->max_inodes;
+ if (!qtoken_init(&sbinfo->token_jar, sbinfo->max_blocks,
+ SHMEM_FREE_BLK_CACHE_SZ))
+ goto failed;
sb->s_maxbytes = SHMEM_MAX_BYTES;
sb->s_blocksize = PAGE_CACHE_SIZE;
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH 2/2] tmpfs: Make tmpfs scalable with caches for free blocks
2010-05-18 23:34 [PATCH 2/2] tmpfs: Make tmpfs scalable with caches for free blocks tim
@ 2010-05-20 23:13 ` Andrew Morton
2010-05-26 19:33 ` Tim Chen
0 siblings, 1 reply; 3+ messages in thread
From: Andrew Morton @ 2010-05-20 23:13 UTC (permalink / raw)
To: tim; +Cc: linux-kernel, Andi Kleen, Hugh Dickins
On Tue, 18 May 2010 16:34:32 -0700
tim <tim.c.chen@linux.intel.com> wrote:
> The current implementation of tmpfs is not scalable.
> The stat_lock is contended whenever we need to get a
> new page, leading to lots of lock contentions. This patch
> makes use of the qtoken library to maintain local
> caches of free pages to speed up getting and returning
> of pages without acquisition of stat_lock. It
> improved the performance of tmpfs by 270% for Aim7 fserver
> workload.
>
> ...
>
> - spin_lock(&sbinfo->stat_lock);
> - sbinfo->free_blocks += pages;
> + spin_lock(&inode->i_lock);
> + qtoken_return(&sbinfo->token_jar, pages);
> inode->i_blocks -= pages*BLOCKS_PER_PAGE;
> - spin_unlock(&sbinfo->stat_lock);
> + spin_unlock(&inode->i_lock);
Well most of the calls into the qtoken layer occur under inode->i_lock.
So did we really need that spinlock inside the qtoken library code?
It is a problem when library code such as qtoken performs its own
internal locking. We have learned that such code is much more useful
and flexible if it performs no locking at all, and requires that
callers provide the locking (lib/rbtree.c, lib/radix-tree.c,
lib/prio_heap.c, lib/flex_array.c, etcetera). Can we follow this
approach with qtoken?
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH 2/2] tmpfs: Make tmpfs scalable with caches for free blocks
2010-05-20 23:13 ` Andrew Morton
@ 2010-05-26 19:33 ` Tim Chen
0 siblings, 0 replies; 3+ messages in thread
From: Tim Chen @ 2010-05-26 19:33 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, Andi Kleen, Hugh Dickins
On Thu, 2010-05-20 at 16:13 -0700, Andrew Morton wrote:
> >
> > - spin_lock(&sbinfo->stat_lock);
> > - sbinfo->free_blocks += pages;
> > + spin_lock(&inode->i_lock);
> > + qtoken_return(&sbinfo->token_jar, pages);
> > inode->i_blocks -= pages*BLOCKS_PER_PAGE;
> > - spin_unlock(&sbinfo->stat_lock);
> > + spin_unlock(&inode->i_lock);
>
> Well most of the calls into the qtoken layer occur under inode->i_lock.
> So did we really need that spinlock inside the qtoken library code?
>
> It is a problem when library code such as qtoken performs its own
> internal locking. We have learned that such code is much more useful
> and flexible if it performs no locking at all, and requires that
> callers provide the locking (lib/rbtree.c, lib/radix-tree.c,
> lib/prio_heap.c, lib/flex_array.c, etcetera). Can we follow this
> approach with qtoken?
>
Andrew,
The inode->i_lock only locks a single inode. The token jar is shared by
all the inodes using the tmpfs so we do not want to use inode->i_lock to
lock the entire token jar for performance reason. With the qtoken
scheme, the spinlock inside the qtoken library is used only to protect
the free tokens in the common pool of the token jar. Most of the time,
this lock need not be taken as we can operate with the tokens in the per
cpu cache of the token jar. We will only need to take the lock when we
run out of tokens in cache. We put the intelligence in the library to
manage the cache and decides when it is necessary to lock and access the
free tokens in the common pool. It is better to leave the locking
decision in the library code rather than exposing it to the user.
Otherwise the user will need to check whether tokens should be taken
from cache or the common pool and duplicate the code in qtoken library.
Regards,
Tim Chen
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2010-05-26 19:36 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-18 23:34 [PATCH 2/2] tmpfs: Make tmpfs scalable with caches for free blocks tim
2010-05-20 23:13 ` Andrew Morton
2010-05-26 19:33 ` Tim Chen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).