linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Theodore Ts'o <tytso@mit.edu>
To: linux-fsdevel@vger.kernel.org
Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>,
	Theodore Ts'o <tytso@mit.edu>
Subject: [PATCH, RFC 3/3] ext4: use the O_HOT and O_COLD open flags to influence inode allocation
Date: Thu, 19 Apr 2012 15:20:11 -0400	[thread overview]
Message-ID: <1334863211-19504-4-git-send-email-tytso@mit.edu> (raw)
In-Reply-To: <1334863211-19504-1-git-send-email-tytso@mit.edu>

Wire up the use of the O_HOT and O_COLD open flags so that when an
inode is being created, it can influence which part of the disk gets
used on rotational storage devices.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4.h    |    8 +++++++-
 fs/ext4/ialloc.c  |   33 +++++++++++++++++++++++++++------
 fs/ext4/migrate.c |    2 +-
 fs/ext4/namei.c   |   15 +++++++++++----
 4 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0e01e90..6539c9a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1553,6 +1553,12 @@ struct ext4_dir_entry_2 {
 #define EXT4_MAX_REC_LEN		((1<<16)-1)
 
 /*
+ * Flags for ext4_new_inode()
+ */
+#define EXT4_NEWI_HOT	0x0001
+#define EXT4_NEWI_COLD	0x0002
+
+/*
  * If we ever get support for fs block sizes > page_size, we'll need
  * to remove the #if statements in the next two functions...
  */
@@ -1850,7 +1856,7 @@ extern int ext4fs_dirhash(const char *name, int len, struct
 /* ialloc.c */
 extern struct inode *ext4_new_inode(handle_t *, struct inode *, umode_t,
 				    const struct qstr *qstr, __u32 goal,
-				    uid_t *owner);
+				    uid_t *owner, int flags);
 extern void ext4_free_inode(handle_t *, struct inode *);
 extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 extern unsigned long ext4_count_free_inodes(struct super_block *);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 409c2ee..3dcc8c8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -363,7 +363,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
 
 static int find_group_orlov(struct super_block *sb, struct inode *parent,
 			    ext4_group_t *group, umode_t mode,
-			    const struct qstr *qstr)
+			    const struct qstr *qstr, int flags)
 {
 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -508,13 +508,20 @@ fallback_retry:
 }
 
 static int find_group_other(struct super_block *sb, struct inode *parent,
-			    ext4_group_t *group, umode_t mode)
+			    ext4_group_t *group, umode_t mode, int flags)
 {
 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
 	ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
 	struct ext4_group_desc *desc;
 	int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
 
+	if ((flags & EXT4_NEWI_HOT) && (ngroups > 3) &&
+	    (parent_group > ngroups / 3))
+		parent_group = 0;
+	if ((flags & EXT4_NEWI_COLD) && (ngroups > 3) &&
+	    (parent_group < (2 * (ngroups / 3))))
+		parent_group = 2 * (ngroups / 3);
+
 	/*
 	 * Try to place the inode is the same flex group as its
 	 * parent.  If we can't find space, use the Orlov algorithm to
@@ -550,7 +557,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 		*group = parent_group + flex_size;
 		if (*group > ngroups)
 			*group = 0;
-		return find_group_orlov(sb, parent, group, mode, NULL);
+		return find_group_orlov(sb, parent, group, mode, NULL, flags);
 	}
 
 	/*
@@ -614,7 +621,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
  * group to find a free inode.
  */
 struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
-			     const struct qstr *qstr, __u32 goal, uid_t *owner)
+			     const struct qstr *qstr, __u32 goal, uid_t *owner,
+			     int flags)
 {
 	struct super_block *sb;
 	struct buffer_head *inode_bitmap_bh = NULL;
@@ -643,6 +651,19 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
 	ei = EXT4_I(inode);
 	sbi = EXT4_SB(sb);
 
+	if (blk_queue_nonrot(bdev_get_queue(sb->s_bdev)))
+		flags &= ~(EXT4_NEWI_HOT | EXT4_NEWI_COLD);
+
+	/* 
+	 * We will only allow the HOT flag if the user passes the
+	 * reserved uid/gid check, or if she has CAP_SYS_RESOURCE
+	 */
+	if ((flags & EXT4_NEWI_HOT) && 
+	    !(sbi->s_resuid == current_fsuid() ||
+	      ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
+	      capable(CAP_SYS_RESOURCE)))
+		flags &= ~EXT4_NEWI_HOT;
+
 	if (!goal)
 		goal = sbi->s_inode_goal;
 
@@ -654,9 +675,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
 	}
 
 	if (S_ISDIR(mode))
-		ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
+		ret2 = find_group_orlov(sb, dir, &group, mode, qstr, flags);
 	else
-		ret2 = find_group_other(sb, dir, &group, mode);
+		ret2 = find_group_other(sb, dir, &group, mode, flags);
 
 got_group:
 	EXT4_I(dir)->i_last_alloc_group = group;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index f39f80f..2b3d65c 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -469,7 +469,7 @@ int ext4_ext_migrate(struct inode *inode)
 	owner[0] = inode->i_uid;
 	owner[1] = inode->i_gid;
 	tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
-				   S_IFREG, NULL, goal, owner);
+				   S_IFREG, NULL, goal, owner, 0);
 	if (IS_ERR(tmp_inode)) {
 		retval = PTR_ERR(tmp_inode);
 		ext4_journal_stop(handle);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6f48ff8..222a419 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1742,6 +1742,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	handle_t *handle;
 	struct inode *inode;
 	int err, retries = 0;
+	int flags = 0;
 
 	dquot_initialize(dir);
 
@@ -1755,7 +1756,13 @@ retry:
 	if (IS_DIRSYNC(dir))
 		ext4_handle_sync(handle);
 
-	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
+	if (op && op->open_flag & O_HOT)
+		flags |= EXT4_NEWI_HOT;
+	if (op && op->open_flag & O_COLD)
+		flags |= EXT4_NEWI_COLD;
+
+	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0,
+			       NULL, flags);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
 		inode->i_op = &ext4_file_inode_operations;
@@ -1791,7 +1798,7 @@ retry:
 	if (IS_DIRSYNC(dir))
 		ext4_handle_sync(handle);
 
-	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
+	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL, 0);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
 		init_special_inode(inode, inode->i_mode, rdev);
@@ -1831,7 +1838,7 @@ retry:
 		ext4_handle_sync(handle);
 
 	inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
-			       &dentry->d_name, 0, NULL);
+			       &dentry->d_name, 0, NULL, 0);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_stop;
@@ -2278,7 +2285,7 @@ retry:
 		ext4_handle_sync(handle);
 
 	inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
-			       &dentry->d_name, 0, NULL);
+			       &dentry->d_name, 0, NULL, 0);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_stop;
-- 
1.7.10.rc3


  parent reply	other threads:[~2012-04-19 19:20 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-04-19 19:20 [PATCH, RFC 0/3] Introduce new O_HOT and O_COLD flags Theodore Ts'o
2012-04-19 19:20 ` [PATCH, RFC 1/3] fs: add new open flags O_HOT and O_COLD Theodore Ts'o
2012-04-19 19:20 ` [PATCH, RFC 2/3] fs: propagate the open_flags structure down to the low-level fs's create() Theodore Ts'o
2012-04-19 19:20 ` Theodore Ts'o [this message]
2012-04-19 19:45   ` [PATCH, RFC 3/3] ext4: use the O_HOT and O_COLD open flags to influence inode allocation Eric Sandeen
2012-04-19 19:59     ` Ted Ts'o
2012-04-19 22:55       ` Andreas Dilger
2012-04-19 23:27   ` Dave Chinner
2012-04-20  2:26     ` Ted Ts'o
2012-04-21  0:57       ` Dave Chinner
2012-04-20  0:26 ` [PATCH, RFC 0/3] Introduce new O_HOT and O_COLD flags Alex Elder
2012-04-20  2:45   ` Ted Ts'o
2012-04-20  9:31     ` Boaz Harrosh
2012-04-20  9:12 ` Boaz Harrosh
2012-04-20  9:45   ` Lukas Czerner
2012-04-20 11:01     ` James Bottomley
2012-04-20 11:23       ` Lukas Czerner
2012-04-20 14:07         ` Christoph Lameter
2012-04-20 14:42         ` James Bottomley
2012-04-20 14:58           ` Ted Ts'o
2012-04-21 23:56             ` KOSAKI Motohiro
2012-04-22  6:30               ` Nick Piggin
2012-04-23  8:23                 ` James Bottomley
2012-04-23 11:47                   ` Nick Piggin
2012-04-24  6:18                     ` Nick Piggin
2012-04-24 15:00                       ` KOSAKI Motohiro
2012-04-21 18:26       ` Jeff Garzik
2012-04-20 10:16 ` Bernd Schubert
2012-04-20 10:38   ` Lukas Czerner
2012-04-21 18:24 ` Jeff Garzik
2012-04-24 16:07 ` Alex Elder
2012-04-24 19:33 ` Jamie Lokier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1334863211-19504-4-git-send-email-tytso@mit.edu \
    --to=tytso@mit.edu \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).