From: "J. R. Okajima" <hooanon05g@gmail.com>
To: linux-fsdevel@vger.kernel.org, dchinner@redhat.com,
viro@zeniv.linux.org.uk, Eric Dumazet <edumazet@google.com>,
Hugh Dickins <hughd@google.com>, Christoph Hellwig <hch@lst.de>,
Andreas Dilger <adilger@dilger.ca>, Jan Kara <jack@suse.cz>
Subject: [RFC PATCH v4 1/2] tmpfs: manage the inode-number by IDR, signed int inum
Date: Thu, 5 Jun 2014 21:27:53 +0900 [thread overview]
Message-ID: <1401971274-8075-2-git-send-email-hooanon05g@gmail.com> (raw)
In-Reply-To: <1401971274-8075-1-git-send-email-hooanon05g@gmail.com>
To ensure the uniquness of the inode-number, manage it by IDR.
Also it tries using the lowest unused inode-number, so the value will
usually be smaller.
Another side effect is the type of the inode-number in tmpfs. By using
IDR, it is limited to signed int. But I don't think it a big
problem. INT_MAX is big enough for the number of inodes in a single tmpfs.
Comparision on performance:
- test program: see below
- version: 3.15.0-rc7
- before this commit
1 procs, 1048575/1048575 file, do unlink, 43.023 secs (usr 1.029 + sys 40.981)
2 procs, 1048574/1048574 file, do unlink, 24.047 secs (usr 1.048 + sys 45.886)
1 procs, 524286/524286 file, do unlink, 21.476 secs (usr 0.529 + sys 20.441)
2 procs, 524286/524286 file, do unlink, 12.029 secs (usr 0.554 + sys 22.880)
1 procs, 32766/32766 file, do unlink, 1.345 secs (usr 0.035 + sys 1.279)
2 procs, 32766/32766 file, do unlink, 0.753 secs (usr 0.030 + sys 1.439)
- after this commit
1 procs, 1048575/1048575 file, do unlink, 45.178 secs (usr 1.183 + sys 43.005)
2 procs, 1048574/1048574 file, do unlink, 25.328 secs (usr 1.126 + sys 48.481)
1 procs, 524286/524286 file, do unlink, 22.668 secs (usr 0.367 + sys 21.806)
2 procs, 524286/524286 file, do unlink, 12.639 secs (usr 0.591 + sys 24.137)
1 procs, 32766/32766 file, do unlink, 1.414 secs (usr 0.028 + sys 1.356)
2 procs, 32766/32766 file, do unlink, 0.787 secs (usr 0.036 + sys 1.500)
The overhead surely exists, but looks around 5% or less.
Test prorams.
------- tmpfs-idr.sh -------
#!/bin/sh
set -eu
f() # dir [opts]
{
local dir=$1
shift
seq $(getconf _NPROCESSORS_ONLN) |
while read ncpu
do
seq 1 |
while read do_unlink
do
sudo mount -v -t tmpfs $@ tmpfs $dir
#stat -f $dir
free_inodes=$(stat -f -c %d $dir)
/tmp/tmpfs-idr $dir $ncpu $free_inodes $do_unlink
sudo umount $dir
done
done
}
dir=/tmp/tmpfs-$$
mkdir $dir
uname -a
free -m
#f $dir -o size=50%,nr_inodes=$((0x7fffffff))
#f $dir -o size=50%,nr_inodes=$((0x07ffffff))
#f $dir -o size=50%,nr_inodes=$((0x007fffff))
f $dir -o size=50%,nr_inodes=$((0x00100000))
f $dir -o size=50%,nr_inodes=$((0x0007ffff))
f $dir -o size=50%,nr_inodes=$((0x00007fff))
rm -fr $dir
------- tmpfs-idr.c -------
#define _GNU_SOURCE
#include <pthread.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#ifndef O_PATH
#define O_PATH 010000000
#endif
pthread_barrier_t barrier;
int rootfd, nproc, nfile, do_unlink;
static int argton(char *s)
{
long l;
errno = 0;
l = strtol(s, NULL, 0);
assert(!((l == LONG_MIN || l == LONG_MAX)
&& errno));
assert(l >= 0);
return l;
}
void *f(void *arg)
{
int err, dirfd, fd, i;
char a[16];
int id = (long)arg;
snprintf(a, sizeof(a), "%d", id);
err = mkdirat(rootfd, a, 0755);
assert(!err);
dirfd = openat(rootfd, a, O_RDONLY | O_PATH);
assert(dirfd >= 0);
err = pthread_barrier_wait(&barrier);
assert(!err || err == PTHREAD_BARRIER_SERIAL_THREAD);
for (i = 0; i < nfile; i++) {
snprintf(a, sizeof(a), "%d", i);
fd = openat(dirfd, a, O_CREAT | O_WRONLY);
if (fd >= 0) {
if (do_unlink)
unlinkat(dirfd, a, /*flags*/ 0);
close(fd);
} else
break;
}
return (void *)(long)i;
}
struct perf {
struct timespec ts;
struct rusage ru;
};
void perf(struct perf *perf)
{
clock_gettime(CLOCK_MONOTONIC, &perf->ts);
getrusage(RUSAGE_SELF, &perf->ru);
}
void ts_subtract(struct timespec *ans, struct timespec *a, struct timespec *b)
{
ans->tv_sec = a->tv_sec - b->tv_sec;
ans->tv_nsec = a->tv_nsec - b->tv_nsec;
if (ans->tv_nsec < 0) {
ans->tv_sec--;
ans->tv_nsec += 1000000000;
}
}
void tv_subtract(struct timeval *ans, struct timeval *a, struct timeval *b)
{
ans->tv_sec = a->tv_sec - b->tv_sec;
ans->tv_usec = a->tv_usec - b->tv_usec;
if (ans->tv_usec < 0) {
ans->tv_sec--;
ans->tv_usec += 1000000;
}
}
#define MAX_NPROC 16
void run(void)
{
int err, i, n;
struct {
pthread_t th;
void *p;
} b[MAX_NPROC];
struct perf s[3];
err = pthread_barrier_init(&barrier, NULL, nproc + 1);
assert(!err);
for (i = 0; i < nproc; i++) {
err = pthread_create(&b[i].th, NULL, f, (void *)(long)i);
assert(!err);
}
perf(s + 0);
err = pthread_barrier_wait(&barrier);
assert(!err || err == PTHREAD_BARRIER_SERIAL_THREAD);
for (i = 0; i < nproc; i++)
pthread_join(b[i].th, &b[i].p);
perf(s + 1);
n = 0;
for (i = 0; i < nproc; i++)
n += (long)b[i].p;
ts_subtract(&s[2].ts, &s[1].ts, &s[0].ts);
tv_subtract(&s[2].ru.ru_utime, &s[1].ru.ru_utime, &s[0].ru.ru_utime);
tv_subtract(&s[2].ru.ru_stime, &s[1].ru.ru_stime, &s[0].ru.ru_stime);
printf("%d procs, %d/%d file, %s unlink, %lu.%03ld secs"
" (usr %lu.%03ld + sys %lu.%03ld)\n",
nproc, n, nfile * nproc, do_unlink ? "do" : "no",
s[2].ts.tv_sec, s[2].ts.tv_nsec / 1000000,
s[2].ru.ru_utime.tv_sec, s[2].ru.ru_utime.tv_usec / 1000,
s[2].ru.ru_stime.tv_sec, s[2].ru.ru_stime.tv_usec / 1000);
}
int main(int argc, char *argv[])
{
rootfd = open(argv[1], O_RDONLY | O_PATH);
assert(rootfd >= 0);
nproc = argton(argv[2]);
assert(nproc < MAX_NPROC);
nfile = argton(argv[3]);
nfile /= nproc;
do_unlink = argton(argv[4]);
run();
return 0;
}
/*
* Local variables: ;
* compile-command: "gcc -g -Wall -UNDEBUG -pthread -o /tmp/tmpfs-idr tmpfs-idr.c -lrt";
* End: ;
*/
----------------------------------------
Cc: Eric Dumazet <edumazet@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andreas Dilger <adilger@dilger.ca>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: J. R. Okajima <hooanon05g@gmail.com>
---
include/linux/shmem_fs.h | 6 ++++--
mm/shmem.c | 37 +++++++++++++++++++++++++++++++------
2 files changed, 35 insertions(+), 8 deletions(-)
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d1771c..4ba8b43 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -24,10 +24,12 @@ struct shmem_inode_info {
};
struct shmem_sb_info {
+ struct mutex idr_lock;
+ struct idr idr; /* manages inode-number */
unsigned long max_blocks; /* How many blocks are allowed */
struct percpu_counter used_blocks; /* How many are allocated */
- unsigned long max_inodes; /* How many inodes are allowed */
- unsigned long free_inodes; /* How many are left for allocation */
+ int max_inodes; /* How many inodes are allowed */
+ int free_inodes; /* How many are left for allocation */
spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
kuid_t uid; /* Mount uid for root directory */
kgid_t gid; /* Mount gid for root directory */
diff --git a/mm/shmem.c b/mm/shmem.c
index 368f314..3ac613d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -107,9 +107,13 @@ static unsigned long shmem_default_max_blocks(void)
return totalram_pages / 2;
}
-static unsigned long shmem_default_max_inodes(void)
+static int shmem_default_max_inodes(void)
{
- return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
+ unsigned long ul;
+
+ ul = INT_MAX;
+ ul = min3(ul, totalram_pages - totalhigh_pages, totalram_pages / 2);
+ return ul;
}
#endif
@@ -569,6 +573,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
static void shmem_evict_inode(struct inode *inode)
{
struct shmem_inode_info *info = SHMEM_I(inode);
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
if (inode->i_mapping->a_ops == &shmem_aops) {
shmem_unacct_size(info->flags, inode->i_size);
@@ -584,6 +589,11 @@ static void shmem_evict_inode(struct inode *inode)
simple_xattrs_free(&info->xattrs);
WARN_ON(inode->i_blocks);
+ if (inode->i_ino) {
+ mutex_lock(&sbinfo->idr_lock);
+ idr_remove(&sbinfo->idr, inode->i_ino);
+ mutex_unlock(&sbinfo->idr_lock);
+ }
shmem_free_inode(inode->i_sb);
clear_inode(inode);
}
@@ -1315,13 +1325,13 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
struct inode *inode;
struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+ int ino;
if (shmem_reserve_inode(sb))
return NULL;
inode = new_inode(sb);
if (inode) {
- inode->i_ino = get_next_ino();
inode_init_owner(inode, dir, mode);
inode->i_blocks = 0;
inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
@@ -1362,6 +1372,18 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
mpol_shared_policy_init(&info->policy, NULL);
break;
}
+
+ /* inum 0 and 1 are unused */
+ mutex_lock(&sbinfo->idr_lock);
+ ino = idr_alloc(&sbinfo->idr, inode, 2, INT_MAX, GFP_NOFS);
+ if (ino > 0) {
+ inode->i_ino = ino;
+ mutex_unlock(&sbinfo->idr_lock);
+ } else {
+ mutex_unlock(&sbinfo->idr_lock);
+ iput(inode); /* shmem_free_inode() will be called */
+ inode = NULL;
+ }
} else
shmem_free_inode(sb);
return inode;
@@ -2385,7 +2407,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
goto bad_val;
} else if (!strcmp(this_char,"nr_inodes")) {
sbinfo->max_inodes = memparse(value, &rest);
- if (*rest)
+ if (*rest || sbinfo->max_inodes < 2)
goto bad_val;
} else if (!strcmp(this_char,"mode")) {
if (remount)
@@ -2438,7 +2460,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
struct shmem_sb_info config = *sbinfo;
- unsigned long inodes;
+ int inodes;
int error = -EINVAL;
config.mpol = NULL;
@@ -2486,7 +2508,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",size=%luk",
sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
if (sbinfo->max_inodes != shmem_default_max_inodes())
- seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
+ seq_printf(seq, ",nr_inodes=%d", sbinfo->max_inodes);
if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
seq_printf(seq, ",mode=%03ho", sbinfo->mode);
if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
@@ -2504,6 +2526,7 @@ static void shmem_put_super(struct super_block *sb)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+ idr_destroy(&sbinfo->idr);
percpu_counter_destroy(&sbinfo->used_blocks);
mpol_put(sbinfo->mpol);
kfree(sbinfo);
@@ -2522,6 +2545,8 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
if (!sbinfo)
return -ENOMEM;
+ mutex_init(&sbinfo->idr_lock);
+ idr_init(&sbinfo->idr);
sbinfo->mode = S_IRWXUGO | S_ISVTX;
sbinfo->uid = current_fsuid();
sbinfo->gid = current_fsgid();
--
1.7.10.4
next prev parent reply other threads:[~2014-06-05 12:27 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-21 18:48 [RFC 0/3] vfs: get_next_ino(), never inum=0 and uniqueness hooanon05g
2014-05-21 18:48 ` [RFC 1/3] vfs: get_next_ino(), never inum=0 hooanon05g
2014-05-28 4:28 ` Hugh Dickins
2014-05-28 5:53 ` Eric Dumazet
2014-05-21 18:48 ` [RFC 2/3] vfs: get_next_ino(), support for the uniqueness hooanon05g
2014-05-22 11:56 ` Jan Kara
2014-05-22 15:03 ` J. R. Okajima
2014-05-22 15:12 ` Jan Kara
2014-05-22 15:14 ` Christoph Hellwig
2014-05-22 16:06 ` Jan Kara
2014-05-29 15:46 ` [PATCH v2 1/2] tmpfs: manage the inode-number by IDR J. R. Okajima
2014-05-29 15:46 ` [PATCH v2 2/2] tmpfs: refine a file handle for NFS-exporting J. R. Okajima
2014-05-31 2:43 ` [PATCH v2 1/2] tmpfs: manage the inode-number by IDR J. R. Okajima
2014-06-01 16:18 ` [RFC PATCH v3 0/2] the uniquness of tmpfs inode-number J. R. Okajima
2014-06-01 16:18 ` [RFC PATCH v3 1/2] tmpfs: manage the inode-number by IDR, signed int inum J. R. Okajima
2014-06-03 9:04 ` Jan Kara
2014-06-03 14:36 ` J. R. Okajima
2014-06-05 12:27 ` [RFC PATCH v4 0/2] tmpfs: manage the inode-number by IDR (performance measure) J. R. Okajima
2014-06-05 12:27 ` J. R. Okajima [this message]
2014-06-05 12:27 ` [RFC PATCH v4 2/2] tmpfs: refine a file handle for NFS-exporting J. R. Okajima
2014-06-01 16:18 ` [RFC PATCH v3 " J. R. Okajima
2014-05-21 18:49 ` [RFC 3/3] uniqueness of inode number, configfs, debugfs, procfs, ramfs and tmpfs hooanon05g
2014-05-22 1:03 ` J. R. Okajima
2014-05-22 11:53 ` Jan Kara
2014-05-22 14:58 ` J. R. Okajima
2014-05-22 15:09 ` Jan Kara
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1401971274-8075-2-git-send-email-hooanon05g@gmail.com \
--to=hooanon05g@gmail.com \
--cc=adilger@dilger.ca \
--cc=dchinner@redhat.com \
--cc=edumazet@google.com \
--cc=hch@lst.de \
--cc=hughd@google.com \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).