From: Theodore Ts'o <tytso@mit.edu>
To: linux-fsdevel@vger.kernel.org
Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>,
Theodore Ts'o <tytso@mit.edu>
Subject: [PATCH, RFC 3/3] ext4: use the O_HOT and O_COLD open flags to influence inode allocation
Date: Thu, 19 Apr 2012 15:20:11 -0400 [thread overview]
Message-ID: <1334863211-19504-4-git-send-email-tytso@mit.edu> (raw)
In-Reply-To: <1334863211-19504-1-git-send-email-tytso@mit.edu>
Wire up the use of the O_HOT and O_COLD open flags so that when an
inode is being created, it can influence which part of the disk gets
used on rotational storage devices.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
fs/ext4/ext4.h | 8 +++++++-
fs/ext4/ialloc.c | 33 +++++++++++++++++++++++++++------
fs/ext4/migrate.c | 2 +-
fs/ext4/namei.c | 15 +++++++++++----
4 files changed, 46 insertions(+), 12 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0e01e90..6539c9a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1553,6 +1553,12 @@ struct ext4_dir_entry_2 {
#define EXT4_MAX_REC_LEN ((1<<16)-1)
/*
+ * Flags for ext4_new_inode()
+ */
+#define EXT4_NEWI_HOT 0x0001
+#define EXT4_NEWI_COLD 0x0002
+
+/*
* If we ever get support for fs block sizes > page_size, we'll need
* to remove the #if statements in the next two functions...
*/
@@ -1850,7 +1856,7 @@ extern int ext4fs_dirhash(const char *name, int len, struct
/* ialloc.c */
extern struct inode *ext4_new_inode(handle_t *, struct inode *, umode_t,
const struct qstr *qstr, __u32 goal,
- uid_t *owner);
+ uid_t *owner, int flags);
extern void ext4_free_inode(handle_t *, struct inode *);
extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 409c2ee..3dcc8c8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -363,7 +363,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4_group_t *group, umode_t mode,
- const struct qstr *qstr)
+ const struct qstr *qstr, int flags)
{
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -508,13 +508,20 @@ fallback_retry:
}
static int find_group_other(struct super_block *sb, struct inode *parent,
- ext4_group_t *group, umode_t mode)
+ ext4_group_t *group, umode_t mode, int flags)
{
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
struct ext4_group_desc *desc;
int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
+ if ((flags & EXT4_NEWI_HOT) && (ngroups > 3) &&
+ (parent_group > ngroups / 3))
+ parent_group = 0;
+ if ((flags & EXT4_NEWI_COLD) && (ngroups > 3) &&
+ (parent_group < (2 * (ngroups / 3))))
+ parent_group = 2 * (ngroups / 3);
+
/*
* Try to place the inode is the same flex group as its
* parent. If we can't find space, use the Orlov algorithm to
@@ -550,7 +557,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
*group = parent_group + flex_size;
if (*group > ngroups)
*group = 0;
- return find_group_orlov(sb, parent, group, mode, NULL);
+ return find_group_orlov(sb, parent, group, mode, NULL, flags);
}
/*
@@ -614,7 +621,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
* group to find a free inode.
*/
struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
- const struct qstr *qstr, __u32 goal, uid_t *owner)
+ const struct qstr *qstr, __u32 goal, uid_t *owner,
+ int flags)
{
struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL;
@@ -643,6 +651,19 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
ei = EXT4_I(inode);
sbi = EXT4_SB(sb);
+ if (blk_queue_nonrot(bdev_get_queue(sb->s_bdev)))
+ flags &= ~(EXT4_NEWI_HOT | EXT4_NEWI_COLD);
+
+ /*
+ * We will only allow the HOT flag if the user passes the
+ * reserved uid/gid check, or if she has CAP_SYS_RESOURCE
+ */
+ if ((flags & EXT4_NEWI_HOT) &&
+ !(sbi->s_resuid == current_fsuid() ||
+ ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
+ capable(CAP_SYS_RESOURCE)))
+ flags &= ~EXT4_NEWI_HOT;
+
if (!goal)
goal = sbi->s_inode_goal;
@@ -654,9 +675,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
}
if (S_ISDIR(mode))
- ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
+ ret2 = find_group_orlov(sb, dir, &group, mode, qstr, flags);
else
- ret2 = find_group_other(sb, dir, &group, mode);
+ ret2 = find_group_other(sb, dir, &group, mode, flags);
got_group:
EXT4_I(dir)->i_last_alloc_group = group;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index f39f80f..2b3d65c 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -469,7 +469,7 @@ int ext4_ext_migrate(struct inode *inode)
owner[0] = inode->i_uid;
owner[1] = inode->i_gid;
tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
- S_IFREG, NULL, goal, owner);
+ S_IFREG, NULL, goal, owner, 0);
if (IS_ERR(tmp_inode)) {
retval = PTR_ERR(tmp_inode);
ext4_journal_stop(handle);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6f48ff8..222a419 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1742,6 +1742,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
handle_t *handle;
struct inode *inode;
int err, retries = 0;
+ int flags = 0;
dquot_initialize(dir);
@@ -1755,7 +1756,13 @@ retry:
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
- inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
+ if (op && op->open_flag & O_HOT)
+ flags |= EXT4_NEWI_HOT;
+ if (op && op->open_flag & O_COLD)
+ flags |= EXT4_NEWI_COLD;
+
+ inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0,
+ NULL, flags);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &ext4_file_inode_operations;
@@ -1791,7 +1798,7 @@ retry:
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
- inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
+ inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL, 0);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
@@ -1831,7 +1838,7 @@ retry:
ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
- &dentry->d_name, 0, NULL);
+ &dentry->d_name, 0, NULL, 0);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
@@ -2278,7 +2285,7 @@ retry:
ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
- &dentry->d_name, 0, NULL);
+ &dentry->d_name, 0, NULL, 0);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
--
1.7.10.rc3
next prev parent reply other threads:[~2012-04-19 19:20 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-04-19 19:20 [PATCH, RFC 0/3] Introduce new O_HOT and O_COLD flags Theodore Ts'o
2012-04-19 19:20 ` [PATCH, RFC 1/3] fs: add new open flags O_HOT and O_COLD Theodore Ts'o
2012-04-19 19:20 ` [PATCH, RFC 2/3] fs: propagate the open_flags structure down to the low-level fs's create() Theodore Ts'o
2012-04-19 19:20 ` Theodore Ts'o [this message]
2012-04-19 19:45 ` [PATCH, RFC 3/3] ext4: use the O_HOT and O_COLD open flags to influence inode allocation Eric Sandeen
2012-04-19 19:59 ` Ted Ts'o
2012-04-19 22:55 ` Andreas Dilger
2012-04-19 23:27 ` Dave Chinner
2012-04-20 2:26 ` Ted Ts'o
2012-04-21 0:57 ` Dave Chinner
2012-04-20 0:26 ` [PATCH, RFC 0/3] Introduce new O_HOT and O_COLD flags Alex Elder
2012-04-20 2:45 ` Ted Ts'o
2012-04-20 9:31 ` Boaz Harrosh
2012-04-20 9:12 ` Boaz Harrosh
2012-04-20 9:45 ` Lukas Czerner
2012-04-20 11:01 ` James Bottomley
2012-04-20 11:23 ` Lukas Czerner
2012-04-20 14:07 ` Christoph Lameter
2012-04-20 14:42 ` James Bottomley
2012-04-20 14:58 ` Ted Ts'o
2012-04-21 23:56 ` KOSAKI Motohiro
2012-04-22 6:30 ` Nick Piggin
2012-04-23 8:23 ` James Bottomley
2012-04-23 11:47 ` Nick Piggin
2012-04-24 6:18 ` Nick Piggin
2012-04-24 15:00 ` KOSAKI Motohiro
2012-04-21 18:26 ` Jeff Garzik
2012-04-20 10:16 ` Bernd Schubert
2012-04-20 10:38 ` Lukas Czerner
2012-04-21 18:24 ` Jeff Garzik
2012-04-24 16:07 ` Alex Elder
2012-04-24 19:33 ` Jamie Lokier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1334863211-19504-4-git-send-email-tytso@mit.edu \
--to=tytso@mit.edu \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).