From: zwu.kernel@gmail.com
To: linux-fsdevel@vger.kernel.org
Cc: linux-ext4@vger.kernel.org, linux-btrfs@vger.kernel.org,
linux-kernel@vger.kernel.org, linuxram@linux.vnet.ibm.com,
viro@zeniv.linux.org.uk, david@fromorbit.com, dave@jikos.cz,
tytso@mit.edu, cmm@us.ibm.com,
Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
Subject: [RFC v3 04/13] vfs: add function for collecting raw access info
Date: Wed, 10 Oct 2012 18:07:26 +0800 [thread overview]
Message-ID: <1349863655-29320-5-git-send-email-zwu.kernel@gmail.com> (raw)
In-Reply-To: <1349863655-29320-1-git-send-email-zwu.kernel@gmail.com>
From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
Add some utils helpers to update access frequencies
for one file or its range.
Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
---
fs/hot_tracking.c | 190 ++++++++++++++++++++++++++++++++++++++++++
fs/hot_tracking.h | 12 +++
include/linux/hot_tracking.h | 4 +
3 files changed, 206 insertions(+), 0 deletions(-)
diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
index 5fd993e..86c87c7 100644
--- a/fs/hot_tracking.c
+++ b/fs/hot_tracking.c
@@ -174,6 +174,196 @@ static void hot_inode_tree_exit(struct hot_info *root)
}
}
+struct hot_inode_item
+*hot_inode_item_find(struct hot_info *root, u64 ino)
+{
+ struct hot_inode_item *he;
+ int ret;
+
+again:
+ spin_lock(&root->lock);
+ he = radix_tree_lookup(&root->hot_inode_tree, ino);
+ if (he) {
+ kref_get(&he->hot_inode.refs);
+ spin_unlock(&root->lock);
+ return he;
+ }
+ spin_unlock(&root->lock);
+
+ he = kmem_cache_zalloc(hot_inode_item_cachep,
+ GFP_KERNEL | GFP_NOFS);
+ if (!he)
+ return ERR_PTR(-ENOMEM);
+
+ hot_inode_item_init(he, ino, &root->hot_inode_tree);
+
+ ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+ if (ret) {
+ kmem_cache_free(hot_inode_item_cachep, he);
+ return ERR_PTR(ret);
+ }
+
+ spin_lock(&root->lock);
+ ret = radix_tree_insert(&root->hot_inode_tree, ino, he);
+ if (ret == -EEXIST) {
+ kmem_cache_free(hot_inode_item_cachep, he);
+ spin_unlock(&root->lock);
+ radix_tree_preload_end();
+ goto again;
+ }
+ spin_unlock(&root->lock);
+ radix_tree_preload_end();
+
+ kref_get(&he->hot_inode.refs);
+ return he;
+}
+
+static struct hot_range_item
+*hot_range_item_find(struct hot_inode_item *he,
+ u32 start)
+{
+ struct hot_range_item *hr;
+ int ret;
+
+again:
+ spin_lock(&he->lock);
+ hr = radix_tree_lookup(&he->hot_range_tree, start);
+ if (hr) {
+ kref_get(&hr->hot_range.refs);
+ spin_unlock(&he->lock);
+ return hr;
+ }
+ spin_unlock(&he->lock);
+
+ hr = kmem_cache_zalloc(hot_range_item_cachep,
+ GFP_KERNEL | GFP_NOFS);
+ if (!hr)
+ return ERR_PTR(-ENOMEM);
+
+ hot_range_item_init(hr, start, he);
+
+ ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+ if (ret) {
+ kmem_cache_free(hot_range_item_cachep, hr);
+ return ERR_PTR(ret);
+ }
+
+ spin_lock(&he->lock);
+ ret = radix_tree_insert(&he->hot_range_tree, start, hr);
+ if (ret == -EEXIST) {
+ kmem_cache_free(hot_range_item_cachep, hr);
+ spin_unlock(&he->lock);
+ radix_tree_preload_end();
+ goto again;
+ }
+ spin_unlock(&he->lock);
+ radix_tree_preload_end();
+
+ kref_get(&hr->hot_range.refs);
+ return hr;
+}
+
+/*
+ * This function does the actual work of updating the frequency numbers,
+ * whatever they turn out to be. FREQ_POWER determines how many atime
+ * deltas we keep track of (as a power of 2). So, setting it to anything above
+ * 16ish is probably overkill. Also, the higher the power, the more bits get
+ * right shifted out of the timestamp, reducing precision, so take note of that
+ * as well.
+ *
+ * The caller should have already locked freq_data's parent's spinlock.
+ *
+ * FREQ_POWER, defined immediately below, determines how heavily to weight
+ * the current frequency numbers against the newest access. For example, a value
+ * of 4 means that the new access information will be weighted 1/16th (ie 2^-4)
+ * as heavily as the existing frequency info. In essence, this is a kludged-
+ * together version of a weighted average, since we can't afford to keep all of
+ * the information that it would take to get a _real_ weighted average.
+ */
+static u64 hot_average_update(struct timespec old_atime,
+ struct timespec cur_time, u64 old_avg)
+{
+ struct timespec delta_ts;
+ u64 new_avg;
+ u64 new_delta;
+
+ delta_ts = timespec_sub(cur_time, old_atime);
+ new_delta = timespec_to_ns(&delta_ts) >> FREQ_POWER;
+
+ new_avg = (old_avg << FREQ_POWER) - old_avg + new_delta;
+ new_avg = new_avg >> FREQ_POWER;
+
+ return new_avg;
+}
+
+static void hot_freq_data_update(struct hot_freq_data *freq_data, bool write)
+{
+ struct timespec cur_time = current_kernel_time();
+
+ if (write) {
+ freq_data->nr_writes += 1;
+ freq_data->avg_delta_writes = hot_average_update(
+ freq_data->last_write_time,
+ cur_time,
+ freq_data->avg_delta_writes);
+ freq_data->last_write_time = cur_time;
+ } else {
+ freq_data->nr_reads += 1;
+ freq_data->avg_delta_reads = hot_average_update(
+ freq_data->last_read_time,
+ cur_time,
+ freq_data->avg_delta_reads);
+ freq_data->last_read_time = cur_time;
+ }
+}
+
+/*
+ * Main function to update access frequency from read/writepage(s) hooks
+ */
+inline void hot_update_freqs(struct hot_info *root,
+ struct inode *inode, u64 start,
+ u64 len, int rw)
+{
+ struct hot_inode_item *he;
+ struct hot_range_item *hr;
+ u32 cur, end;
+
+ if (!TRACK_THIS_INODE(inode) || (len == 0))
+ return;
+
+ he = hot_inode_item_find(root, inode->i_ino);
+ if (IS_ERR(he)) {
+ WARN_ON(1);
+ return;
+ }
+
+ spin_lock(&he->hot_inode.lock);
+ hot_freq_data_update(&he->hot_inode.hot_freq_data, rw);
+ spin_unlock(&he->hot_inode.lock);
+
+ /*
+ * Align ranges on RANGE_SIZE boundary
+ * to prevent proliferation of range structs
+ */
+ end = (start + len + RANGE_SIZE - 1) >> RANGE_BITS;
+ for (cur = (start >> RANGE_BITS); cur < end; cur++) {
+ hr = hot_range_item_find(he, cur);
+ if (IS_ERR(hr)) {
+ WARN_ON(1);
+ hot_inode_item_put(he);
+ return;
+ }
+
+ spin_lock(&hr->hot_range.lock);
+ hot_freq_data_update(&hr->hot_range.hot_freq_data, rw);
+ spin_unlock(&hr->hot_range.lock);
+
+ hot_range_item_put(hr);
+ }
+
+ hot_inode_item_put(he);
+}
+
/*
* Initialize kmem cache for hot_inode_item and hot_range_item.
*/
diff --git a/fs/hot_tracking.h b/fs/hot_tracking.h
index 4e8aa77..37f69ee 100644
--- a/fs/hot_tracking.h
+++ b/fs/hot_tracking.h
@@ -19,6 +19,18 @@
/* values for hot_freq_data flags */
#define FREQ_DATA_TYPE_INODE (1 << 0)
#define FREQ_DATA_TYPE_RANGE (1 << 1)
+/* size of sub-file ranges */
+#define RANGE_BITS 20
+#define RANGE_SIZE (1 << RANGE_BITS)
+
+#define FREQ_POWER 4
+
+struct hot_inode_item
+*hot_inode_item_find(struct hot_info *root, u64 ino);
+void hot_inode_item_put(struct hot_inode_item *he);
+inline void hot_update_freqs(struct hot_info *root,
+ struct inode *inode, u64 start,
+ u64 len, int rw);
void hot_track_init(struct super_block *sb);
void hot_track_exit(struct super_block *sb);
diff --git a/include/linux/hot_tracking.h b/include/linux/hot_tracking.h
index 13aa54b..1e0aed5 100644
--- a/include/linux/hot_tracking.h
+++ b/include/linux/hot_tracking.h
@@ -75,4 +75,8 @@ extern struct hot_info *global_hot_tracking_info;
extern void hot_track_init(struct super_block *sb);
extern void hot_track_exit(struct super_block *sb);
+extern inline void hot_update_freqs(struct hot_info *root,
+ struct inode *inode, u64 start,
+ u64 len, int rw);
+
#endif /* _LINUX_HOTTRACK_H */
--
1.7.6.5
next prev parent reply other threads:[~2012-10-10 10:08 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-10-10 10:07 [RFC v3 00/13] vfs: hot data tracking zwu.kernel
2012-10-10 10:07 ` [RFC v3 01/13] btrfs: add one new mount option '-o hot_track' zwu.kernel
[not found] ` <5075632c.03cc440a.1b33.7805SMTPIN_ADDED@mx.google.com>
2012-10-10 12:21 ` Zhi Yong Wu
2012-10-10 12:21 ` Zhi Yong Wu
2012-10-10 13:11 ` Lukáš Czerner
2012-10-10 13:16 ` Zhi Yong Wu
2012-10-10 16:28 ` David Sterba
2012-10-11 13:41 ` Zhi Yong Wu
2012-10-11 14:35 ` Zhi Yong Wu
2012-10-11 14:41 ` David Sterba
2012-10-11 14:46 ` Zhi Yong Wu
2012-10-10 10:07 ` [RFC v3 02/13] vfs: introduce private radix tree structures zwu.kernel
2012-10-10 15:34 ` David Sterba
2012-10-11 13:35 ` Zhi Yong Wu
2012-10-10 10:07 ` [RFC v3 03/13] vfs: Initialize and free main data structures zwu.kernel
2012-10-10 10:07 ` zwu.kernel [this message]
2012-10-10 10:07 ` [RFC v3 05/13] vfs: add two map arrays zwu.kernel
2012-10-10 10:07 ` [RFC v3 06/13] vfs: add hooks to enable hot data tracking zwu.kernel
2012-10-10 10:07 ` [RFC v3 07/13] vfs: add function for updating map arrays zwu.kernel
2012-10-10 10:07 ` [RFC v3 08/13] vfs: add aging function for old map info zwu.kernel
2012-10-10 10:07 ` [RFC v3 09/13] vfs: add one wq to update map info periodically zwu.kernel
2012-10-16 0:27 ` Dave Chinner
2012-10-17 6:34 ` Zhi Yong Wu
2012-10-18 2:25 ` Zheng Liu
2012-10-18 2:26 ` Zhi Yong Wu
2012-10-10 10:07 ` [RFC v3 10/13] vfs: register one memory shrinker zwu.kernel
2012-10-10 10:07 ` [RFC v3 11/13] vfs: add 3 new ioctl interfaces zwu.kernel
2012-10-15 7:48 ` Dave Chinner
2012-10-15 7:57 ` Zhi Yong Wu
2012-10-16 3:17 ` Dave Chinner
2012-10-16 4:18 ` Zhi Yong Wu
2012-10-19 8:21 ` Zhi Yong Wu
2012-10-10 10:07 ` [RFC v3 12/13] vfs: add debugfs support zwu.kernel
2012-10-10 16:53 ` David Sterba
2012-10-10 21:05 ` David Sterba
2012-10-15 7:55 ` Dave Chinner
2012-10-15 8:15 ` Zhi Yong Wu
2012-10-15 8:04 ` Dave Chinner
2012-10-15 8:47 ` Zhi Yong Wu
2012-10-10 10:07 ` [RFC v3 13/13] vfs: add documentation zwu.kernel
2012-10-15 0:35 ` Zheng Liu
2012-10-15 7:04 ` Zhi Yong Wu
2012-10-15 0:39 ` [RFC v3 00/13] vfs: hot data tracking Zheng Liu
2012-10-15 7:05 ` Zhi Yong Wu
2012-10-15 20:42 ` Dave Chinner
2012-10-17 8:57 ` Zhi Yong Wu
2012-10-18 4:29 ` Dave Chinner
2012-10-18 4:44 ` Zhi Yong Wu
2012-10-18 5:17 ` Dave Chinner
2012-10-18 5:24 ` Zhi Yong Wu
2012-10-19 8:29 ` Zhi Yong Wu
2012-10-16 0:04 ` [PATCH] xfs: add hot tracking support Dave Chinner
2012-11-07 8:38 ` Zhi Yong Wu
2012-11-08 5:13 ` Dave Chinner
2012-10-16 0:11 ` [RFC v3 00/13] vfs: hot data tracking Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1349863655-29320-5-git-send-email-zwu.kernel@gmail.com \
--to=zwu.kernel@gmail.com \
--cc=cmm@us.ibm.com \
--cc=dave@jikos.cz \
--cc=david@fromorbit.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxram@linux.vnet.ibm.com \
--cc=tytso@mit.edu \
--cc=viro@zeniv.linux.org.uk \
--cc=wuzhy@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.