From: "J. R. Okajima" <hooanon05g@gmail.com>
To: Christoph Hellwig <hch@lst.de>
Cc: dchinner@redhat.com, viro@zeniv.linux.org.uk,
linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH] vfs: get_next_ino(), never inum=0
Date: Wed, 30 Apr 2014 13:08:54 +0900 [thread overview]
Message-ID: <7467.1398830934@jrobl> (raw)
In-Reply-To: <20140429175342.GA26109@lst.de>
Christoph Hellwig:
> If you care about really unique inode numbers you shouldn't use get_next_ino
> but something like an idr allocator. The default i_ino assigned in
> new_inode() from which get_next_ino was factored out was mostly intended
> for small synthetic filesystems with few enough inodes that it wouldn't
> wrap around.
Grep-ping get_next_ino, I got 30 calls in mainline.
How many of them are get_next_ino() inappropriate? I don't know. But at
least for tmpfs, it is better to manage the inums by itself since tmpfs
must be one of the biggest consumer of inums and it is NFS-exportable.
Do you think we need a common function in VFS to manage inums per sb, or
it is totally up to filesystem and the common function is unnecessary?
Instead of idr, I was thinking about a simple bitmap in tmpfs such like
this. It introduces a new mount option "ino" which forces tmpfs to
assign the lowest unused number for a new inode within the mounted
tmpfs. Without "ino" or specifying "noino", the behaviour is unchanged
(use vfs:get_next_ino()).
But it may not scale well due to the single spinlock every time.
J. R. Okajima
commit 214d38e8c34fb341fd0f37cc92614b5e93e0803b
Author: J. R. Okajima <hooanon05@yahoo.co.jp>
Date: Mon Sep 2 10:45:42 2013 +0900
shmem: management for inum
Signed-off-by: J. R. Okajima <hooanon05g@gmail.com>
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d1771c..39762e1 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -29,6 +29,8 @@ struct shmem_sb_info {
unsigned long max_inodes; /* How many inodes are allowed */
unsigned long free_inodes; /* How many are left for allocation */
spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
+ spinlock_t ino_lock;
+ unsigned long *ino_bitmap;
kuid_t uid; /* Mount uid for root directory */
kgid_t gid; /* Mount gid for root directory */
umode_t mode; /* Mount mode for root directory */
diff --git a/mm/shmem.c b/mm/shmem.c
index 9f70e02..bc2c5e4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -197,14 +197,61 @@ static int shmem_reserve_inode(struct super_block *sb)
return 0;
}
-static void shmem_free_inode(struct super_block *sb)
+static void shmem_free_inode(struct inode *inode)
{
+ struct super_block *sb = inode->i_sb;
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+
if (sbinfo->max_inodes) {
spin_lock(&sbinfo->stat_lock);
sbinfo->free_inodes++;
spin_unlock(&sbinfo->stat_lock);
}
+
+ if (!inode->i_nlink) {
+ spin_lock(&sbinfo->ino_lock);
+ if (sbinfo->ino_bitmap)
+ clear_bit(inode->i_ino - 2, sbinfo->ino_bitmap);
+ spin_unlock(&sbinfo->ino_lock);
+ }
+}
+
+/*
+ * This is unsigned int instead of unsigned long.
+ * For details, see fs/inode.c:get_next_ino().
+ */
+unsigned int shmem_next_ino(struct super_block *sb)
+{
+ unsigned long ino;
+ struct shmem_sb_info *sbinfo;
+
+ ino = 0;
+ sbinfo = SHMEM_SB(sb);
+ if (sbinfo->ino_bitmap) {
+ spin_lock(&sbinfo->ino_lock);
+ /*
+ * someone else may remount,
+ * and ino_bitmap might be reset.
+ */
+ if (sbinfo->ino_bitmap
+ && !bitmap_full(sbinfo->ino_bitmap, sbinfo->max_inodes)) {
+ ino = find_first_zero_bit(sbinfo->ino_bitmap,
+ sbinfo->max_inodes);
+ set_bit(ino, sbinfo->ino_bitmap);
+ ino += 2; /* ino 0 and 1 are reserved */
+ }
+ spin_unlock(&sbinfo->ino_lock);
+ }
+
+ /*
+ * someone else did remount,
+ * or ino_bitmap is unused originally,
+ * or ino_bimapt is full.
+ */
+ if (!ino)
+ ino = get_next_ino();
+
+ return ino;
}
/**
@@ -578,7 +625,7 @@ static void shmem_evict_inode(struct inode *inode)
simple_xattrs_free(&info->xattrs);
WARN_ON(inode->i_blocks);
- shmem_free_inode(inode->i_sb);
+ shmem_free_inode(inode);
clear_inode(inode);
}
@@ -1306,7 +1353,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
inode = new_inode(sb);
if (inode) {
- inode->i_ino = get_next_ino();
+ inode->i_ino = shmem_next_ino(sb);
inode_init_owner(inode, dir, mode);
inode->i_blocks = 0;
inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
@@ -1348,7 +1395,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
break;
}
} else
- shmem_free_inode(sb);
+ shmem_free_inode(inode);
return inode;
}
@@ -1945,7 +1992,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
struct inode *inode = dentry->d_inode;
if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
- shmem_free_inode(inode->i_sb);
+ shmem_free_inode(inode);
dir->i_size -= BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
@@ -2315,6 +2362,54 @@ static const struct export_operations shmem_export_ops = {
.fh_to_dentry = shmem_fh_to_dentry,
};
+static void shmem_ino_bitmap(struct shmem_sb_info *sbinfo,
+ unsigned long prev_max)
+{
+ unsigned long *p;
+ unsigned long n, d;
+ int do_msg;
+
+ n = sbinfo->max_inodes / BITS_PER_BYTE;
+ if (sbinfo->max_inodes % BITS_PER_BYTE)
+ n++;
+
+ do_msg = 0;
+ if (sbinfo->ino_bitmap) {
+ /*
+ * by shrinking the bitmap, the large inode number in use
+ * may be left. but it is harmless.
+ */
+ d = 0;
+ if (sbinfo->max_inodes > prev_max) {
+ d = sbinfo->max_inodes - prev_max;
+ d /= BITS_PER_BYTE;
+ }
+ spin_lock(&sbinfo->ino_lock);
+ p = krealloc(sbinfo->ino_bitmap, n, GFP_NOWAIT);
+ if (p) {
+ memset(p + n - d, 0, d);
+ sbinfo->ino_bitmap = p;
+ spin_unlock(&sbinfo->ino_lock);
+ } else {
+ p = sbinfo->ino_bitmap;
+ sbinfo->ino_bitmap = NULL;
+ spin_unlock(&sbinfo->ino_lock);
+ kfree(p);
+ do_msg = 1;
+ }
+ } else {
+ p = kzalloc(n, GFP_NOFS);
+ spin_lock(&sbinfo->ino_lock);
+ sbinfo->ino_bitmap = p;
+ spin_unlock(&sbinfo->ino_lock);
+ do_msg = !p;
+ }
+
+ if (unlikely(do_msg))
+ pr_err("%s: ino failed (%lu bytes). Ignored.\n",
+ __func__, n);
+}
+
static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
bool remount)
{
@@ -2322,7 +2417,10 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
struct mempolicy *mpol = NULL;
uid_t uid;
gid_t gid;
+ bool do_ino;
+ unsigned long old_val = sbinfo->max_inodes;
+ do_ino = 0;
while (options != NULL) {
this_char = options;
for (;;) {
@@ -2342,6 +2440,14 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
}
if (!*this_char)
continue;
+ if (!strcmp(this_char, "ino")) {
+ do_ino = 1;
+ continue;
+ } else if (!strcmp(this_char, "noino")) {
+ do_ino = 0;
+ continue;
+ }
+
if ((value = strchr(this_char,'=')) != NULL) {
*value++ = 0;
} else {
@@ -2370,7 +2476,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
goto bad_val;
} else if (!strcmp(this_char,"nr_inodes")) {
sbinfo->max_inodes = memparse(value, &rest);
- if (*rest)
+ if (*rest || !sbinfo->max_inodes)
goto bad_val;
} else if (!strcmp(this_char,"mode")) {
if (remount)
@@ -2408,6 +2514,16 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
}
}
sbinfo->mpol = mpol;
+
+ if (do_ino)
+ shmem_ino_bitmap(sbinfo, old_val);
+ else if (sbinfo->ino_bitmap) {
+ void *p = sbinfo->ino_bitmap;
+ spin_lock(&sbinfo->ino_lock);
+ sbinfo->ino_bitmap = NULL;
+ spin_unlock(&sbinfo->ino_lock);
+ kfree(p);
+ }
return 0;
bad_val:
@@ -2472,6 +2588,8 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
if (sbinfo->max_inodes != shmem_default_max_inodes())
seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
+ if (sbinfo->ino_bitmap)
+ seq_printf(seq, ",ino");
if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
seq_printf(seq, ",mode=%03ho", sbinfo->mode);
if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
@@ -2491,6 +2609,7 @@ static void shmem_put_super(struct super_block *sb)
percpu_counter_destroy(&sbinfo->used_blocks);
mpol_put(sbinfo->mpol);
+ kfree(sbinfo->ino_bitmap);
kfree(sbinfo);
sb->s_fs_info = NULL;
}
@@ -2510,6 +2629,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
sbinfo->mode = S_IRWXUGO | S_ISVTX;
sbinfo->uid = current_fsuid();
sbinfo->gid = current_fsgid();
+ spin_lock_init(&sbinfo->ino_lock);
sb->s_fs_info = sbinfo;
#ifdef CONFIG_TMPFS
next prev parent reply other threads:[~2014-04-30 4:09 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-04-29 15:45 [PATCH] vfs: get_next_ino(), never inum=0 hooanon05g
2014-04-29 17:42 ` J. R. Okajima
2014-04-29 17:53 ` Christoph Hellwig
2014-04-30 4:08 ` J. R. Okajima [this message]
2014-04-30 22:56 ` Andreas Dilger
2014-05-10 3:18 ` J. R. Okajima
2014-08-18 18:21 ` [PATCH v2] " Carlos Maiolino
2014-08-19 0:58 ` J. R. Okajima
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7467.1398830934@jrobl \
--to=hooanon05g@gmail.com \
--cc=dchinner@redhat.com \
--cc=hch@lst.de \
--cc=linux-fsdevel@vger.kernel.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).