From mboxrd@z Thu Jan 1 00:00:00 1970 From: zwu.kernel@gmail.com Subject: [[RESEND]PATCH v4 04/10] VFS hot tracking: Add shrinker functionality to curtail memory usage Date: Mon, 12 Aug 2013 10:20:18 +0800 Message-ID: <1376274024-28689-5-git-send-email-zwu.kernel@gmail.com> References: <1376274024-28689-1-git-send-email-zwu.kernel@gmail.com> Cc: torvalds@linux-foundation.org, linux-fsdevel@vger.kernel.org, Zhi Yong Wu , Chandra Seetharaman To: viro@zeniv.linux.org.uk Return-path: Received: from e39.co.us.ibm.com ([32.97.110.160]:59388 "EHLO e39.co.us.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755298Ab3HLCW6 (ORCPT ); Sun, 11 Aug 2013 22:22:58 -0400 Received: from /spool/local by e39.co.us.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Sun, 11 Aug 2013 20:22:58 -0600 Received: from d01relay04.pok.ibm.com (d01relay04.pok.ibm.com [9.56.227.236]) by d01dlp02.pok.ibm.com (Postfix) with ESMTP id EEB526E803C for ; Sun, 11 Aug 2013 22:22:49 -0400 (EDT) Received: from d01av03.pok.ibm.com (d01av03.pok.ibm.com [9.56.224.217]) by d01relay04.pok.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id r7C2Mtbr196608 for ; Sun, 11 Aug 2013 22:22:55 -0400 Received: from d01av03.pok.ibm.com (loopback [127.0.0.1]) by d01av03.pok.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id r7C2Ms3E023522 for ; Sun, 11 Aug 2013 23:22:55 -0300 In-Reply-To: <1376274024-28689-1-git-send-email-zwu.kernel@gmail.com> Sender: linux-fsdevel-owner@vger.kernel.org List-ID: From: Zhi Yong Wu Register a shrinker to control the amount of memory that is used in tracking hot regions. If we are throwing inodes out of memory due to memory pressure, we most definitely are going to need to reduce the amount of memory the tracking code is using, even if it means losing useful information. Signed-off-by: Chandra Seetharaman Signed-off-by: Zhi Yong Wu --- fs/hot_tracking.c | 74 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/hot_tracking.h | 2 ++ 2 files changed, 76 insertions(+) diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c index 7a6271c..b7f5767 100644 --- a/fs/hot_tracking.c +++ b/fs/hot_tracking.c @@ -29,13 +29,16 @@ static void hot_range_item_init(struct hot_range_item *hr, hr->start = start; hr->len = hot_bit_shift(1, RANGE_BITS, true); hr->hot_inode = he; + atomic_long_inc(&he->hot_root->hot_cnt); } static void hot_range_item_free_cb(struct rcu_head *head) { struct hot_range_item *hr = container_of(head, struct hot_range_item, rcu); + struct hot_info *root = hr->hot_inode->hot_root; + atomic_long_dec(&root->hot_cnt); kmem_cache_free(hot_range_item_cachep, hr); } @@ -219,13 +222,16 @@ static void hot_inode_item_init(struct hot_inode_item *he, he->i_ino = ino; he->hot_root = root; spin_lock_init(&he->i_lock); + atomic_long_inc(&root->hot_cnt); } static void hot_inode_item_free_cb(struct rcu_head *head) { struct hot_inode_item *he = container_of(head, struct hot_inode_item, rcu); + struct hot_info *root = he->hot_root; + atomic_long_dec(&root->hot_cnt); kmem_cache_free(hot_inode_item_cachep, he); } @@ -504,6 +510,39 @@ u32 hot_temp_calc(struct hot_freq *freq) return result; } +static void hot_item_evict(struct hot_info *root, unsigned long work, + unsigned long (*work_get)(struct hot_info *root)) +{ + int i; + + if (work <= 0) + return; + + for (i = 0; i < MAP_SIZE; i++) { + struct hot_inode_item *he, *next; + unsigned long work_prev; + + spin_lock(&root->t_lock); + if (list_empty(&root->hot_map[TYPE_INODE][i])) { + spin_unlock(&root->t_lock); + continue; + } + + list_for_each_entry_safe(he, next, + &root->hot_map[TYPE_INODE][i], track_list) { + work_prev = work_get(root); + hot_inode_item_put(he); + work -= (work_prev - work_get(root)); + if (work <= 0) + break; + } + spin_unlock(&root->t_lock); + + if (work <= 0) + break; + } +} + /* * Every sync period we update temperatures for * each hot inode item and hot range item for aging @@ -566,6 +605,34 @@ void __init hot_cache_init(void) } EXPORT_SYMBOL_GPL(hot_cache_init); +static inline unsigned long hot_cnt_get(struct hot_info *root) +{ + return (unsigned long)atomic_long_read(&root->hot_cnt); +} + +static void hot_prune_map(struct hot_info *root, unsigned long nr) +{ + hot_item_evict(root, nr, hot_cnt_get); +} + +/* The shrinker callback function */ +static int hot_track_prune(struct shrinker *shrink, + struct shrink_control *sc) +{ + struct hot_info *root = + container_of(shrink, struct hot_info, hot_shrink); + + if (sc->nr_to_scan == 0) + return atomic_long_read(&root->hot_cnt) / 2; + + if (!(sc->gfp_mask & __GFP_FS)) + return -1; + + hot_prune_map(root, sc->nr_to_scan); + + return atomic_long_read(&root->hot_cnt); +} + /* * Main function to update i/o access frequencies, and it will be called * from read/writepages() hooks, which are read_pages(), do_writepages(), @@ -633,6 +700,7 @@ static struct hot_info *hot_tree_init(struct super_block *sb) root->hot_inode_tree = RB_ROOT; spin_lock_init(&root->t_lock); spin_lock_init(&root->m_lock); + atomic_long_set(&root->hot_cnt, 0); for (i = 0; i < MAP_SIZE; i++) { for (j = 0; j < MAX_TYPES; j++) @@ -653,6 +721,11 @@ static struct hot_info *hot_tree_init(struct super_block *sb) queue_delayed_work(root->update_wq, &root->update_work, msecs_to_jiffies(HOT_UPDATE_INTERVAL * MSEC_PER_SEC)); + /* Register a shrinker callback */ + root->hot_shrink.shrink = hot_track_prune; + root->hot_shrink.seeks = DEFAULT_SEEKS; + register_shrinker(&root->hot_shrink); + return root; } @@ -663,6 +736,7 @@ static void hot_tree_exit(struct hot_info *root) { struct rb_node *node; + unregister_shrinker(&root->hot_shrink); cancel_delayed_work_sync(&root->update_work); destroy_workqueue(root->update_wq); diff --git a/include/linux/hot_tracking.h b/include/linux/hot_tracking.h index f4cad4b..d446dba 100644 --- a/include/linux/hot_tracking.h +++ b/include/linux/hot_tracking.h @@ -82,8 +82,10 @@ struct hot_info { struct list_head hot_map[MAX_TYPES][MAP_SIZE]; /* map of inode temp */ spinlock_t t_lock; /* protect tree and map for inode item */ spinlock_t m_lock; /* protect map for range item */ + atomic_long_t hot_cnt; struct workqueue_struct *update_wq; struct delayed_work update_work; + struct shrinker hot_shrink; }; extern void __init hot_cache_init(void); -- 1.7.11.7