All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-mm@kvack.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 27/39] mm: clockpro: nonresident page tracking for CLOCK-Pro
Date: Wed, 12 Jul 2006 16:42:18 +0200	[thread overview]
Message-ID: <20060712144218.16998.32605.sendpatchset@lappy> (raw)
In-Reply-To: <20060712143659.16998.6444.sendpatchset@lappy>

From: Rik van Riel <riel@redhat.com>

Track non-resident pages through a simple hashing scheme.  This way
the space overhead is limited to 1 u32 per page, or 0.1% space overhead
and lookups are one cache miss.

Aside from seeing whether or not a page was recently evicted, we can
also take a reasonable guess at how many other pages were evicted since
this page was evicted.

NOTE: bucket space also contributes to the total size of the hash.
This way even 64-bit machines with more than 2^32 pages get a fair
chance.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Marcelo Tosatti <marcelo.tosatti@cyclades.com>

 include/linux/nonresident.h |   12 +++
 mm/nonresident.c            |  175 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 187 insertions(+)

Index: linux-2.6/mm/nonresident.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/mm/nonresident.c	2006-07-12 16:11:22.000000000 +0200
@@ -0,0 +1,175 @@
+/*
+ * mm/nonresident.c
+ * (C) 2004,2005 Red Hat, Inc
+ * Written by Rik van Riel <riel@redhat.com>
+ * Released under the GPL, see the file COPYING for details.
+ *
+ * Keeps track of whether a non-resident page was recently evicted
+ * and should be immediately promoted to the active list. This also
+ * helps automatically tune the inactive target.
+ *
+ * The pageout code stores a recently evicted page in this cache
+ * by calling remember_page(mapping/mm, index/vaddr, generation)
+ * and can look it up in the cache by calling recently_evicted()
+ * with the same arguments.
+ *
+ * Note that there is no way to invalidate pages after eg. truncate
+ * or exit, we let the pages fall out of the non-resident set through
+ * normal replacement.
+ */
+#include <linux/mm.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/hash.h>
+#include <linux/prefetch.h>
+#include <linux/kernel.h>
+
+/* Number of non-resident pages per hash bucket. Never smaller than 15. */
+#if (L1_CACHE_BYTES < 64)
+#define NR_BUCKET_BYTES 64
+#else
+#define NR_BUCKET_BYTES L1_CACHE_BYTES
+#endif
+#define NUM_NR ((NR_BUCKET_BYTES - sizeof(atomic_t))/sizeof(u32))
+
+struct nr_bucket
+{
+	atomic_t hand;
+	u32 page[NUM_NR];
+} ____cacheline_aligned;
+
+/* The non-resident page hash table. */
+static struct nr_bucket * nonres_table;
+static unsigned int nonres_shift;
+static unsigned int nonres_mask;
+
+static struct nr_bucket * nr_hash(void * mapping, unsigned long index)
+{
+	unsigned long bucket;
+	unsigned long hash;
+
+	hash = hash_ptr(mapping, BITS_PER_LONG);
+	hash = 37 * hash + hash_long(index, BITS_PER_LONG);
+	bucket = hash & nonres_mask;
+
+	return nonres_table + bucket;
+}
+
+static u32 nr_cookie(struct address_space * mapping, unsigned long index)
+{
+	/*
+	 * Different hash magic from bucket selection to insure
+	 * the combined bits extend hash-space.
+	 */
+	unsigned long cookie = hash_long(index, BITS_PER_LONG);
+	cookie = 51 * cookie + hash_ptr(mapping, BITS_PER_LONG);
+
+	if (mapping && mapping->host) {
+		cookie = 37 * cookie + hash_long(mapping->host->i_ino, BITS_PER_LONG);
+	}
+
+	return (u32)(cookie >> (BITS_PER_LONG - 32));
+}
+
+unsigned long nonresident_get(struct address_space * mapping, unsigned long index)
+{
+	struct nr_bucket * nr_bucket;
+	int distance;
+	u32 wanted;
+	int i;
+
+	prefetch(mapping->host);
+	nr_bucket = nr_hash(mapping, index);
+
+	prefetch(nr_bucket);
+	wanted = nr_cookie(mapping, index);
+
+	for (i = 0; i < NUM_NR; i++) {
+		if (nr_bucket->page[i] == wanted) {
+			nr_bucket->page[i] = 0;
+			/* Return the distance between entry and clock hand. */
+			distance = atomic_read(&nr_bucket->hand) + NUM_NR - i;
+			distance = (distance % NUM_NR) << nonres_shift;
+			/*
+			 * Add some jitter to the lower nonres_shift bits.
+			 */
+			distance += (nr_bucket - nonres_table);
+			return distance;
+		}
+	}
+
+	return ~0UL;
+}
+
+u32 nonresident_put(struct address_space * mapping, unsigned long index)
+{
+	struct nr_bucket * nr_bucket;
+	u32 nrpage;
+	int i;
+
+	prefetch(mapping->host);
+	nr_bucket = nr_hash(mapping, index);
+
+	prefetchw(nr_bucket);
+	nrpage = nr_cookie(mapping, index);
+
+	/* Atomically find the next array index. */
+	preempt_disable();
+retry:
+	i = atomic_inc_return(&nr_bucket->hand);
+	if (unlikely(i >= NUM_NR)) {
+		if (i == NUM_NR)
+			atomic_set(&nr_bucket->hand, -1);
+		goto retry;
+	}
+	preempt_enable();
+
+	/* Statistics may want to know whether the entry was in use. */
+	return xchg(&nr_bucket->page[i], nrpage);
+}
+
+unsigned long fastcall nonresident_total(void)
+{
+	return NUM_NR << nonres_shift;
+}
+
+/*
+ * For interactive workloads, we remember about as many non-resident pages
+ * as we have actual memory pages.  For server workloads with large inter-
+ * reference distances we could benefit from remembering more.
+ */
+static __initdata unsigned long nonresident_factor = 1;
+void __init nonresident_init(void)
+{
+	int target;
+	int i;
+
+	/*
+	 * Calculate the non-resident hash bucket target. Use a power of
+	 * two for the division because alloc_large_system_hash rounds up.
+	 */
+	target = nr_all_pages * nonresident_factor;
+	target /= (sizeof(struct nr_bucket) / sizeof(u32));
+
+	nonres_table = alloc_large_system_hash("Non-resident page tracking",
+					sizeof(struct nr_bucket),
+					target,
+					0,
+					HASH_EARLY | HASH_HIGHMEM,
+					&nonres_shift,
+					&nonres_mask,
+					0);
+
+	for (i = 0; i < (1 << nonres_shift); i++)
+		atomic_set(&nonres_table[i].hand, 0);
+}
+
+static int __init set_nonresident_factor(char * str)
+{
+	if (!str)
+		return 0;
+	nonresident_factor = simple_strtoul(str, &str, 0);
+	return 1;
+}
+__setup("nonresident_factor=", set_nonresident_factor);
Index: linux-2.6/include/linux/nonresident.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/include/linux/nonresident.h	2006-07-12 16:11:22.000000000 +0200
@@ -0,0 +1,12 @@
+#ifndef _LINUX_NONRESIDENT_H_
+#define _LINUX_NONRESIDENT_H_
+
+#ifdef __KERNEL__
+
+extern void nonresident_init(void);
+extern unsigned long nonresident_get(struct address_space *, unsigned long);
+extern u32 nonresident_put(struct address_space *, unsigned long);
+extern unsigned long fastcall nonresident_total(void);
+
+#endif /* __KERNEL */
+#endif /* _LINUX_NONRESIDENT_H_ */

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2006-07-12 14:42 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-07-12 14:36 [PATCH 0/39] mm: 2.6.17-pr1 - generic page-replacement framework and 4 new policies Peter Zijlstra
2006-07-12 14:37 ` [PATCH 1/39] mm: disuse activate_page() Peter Zijlstra
2006-07-12 14:37 ` [PATCH 2/39] mm: adjust blk_congestion_wait() logic Peter Zijlstra
2006-07-12 14:37 ` [PATCH 3/39] mm: pgrep: prepare for page replace framework Peter Zijlstra
2006-07-12 14:37 ` [PATCH 4/39] mm: pgrep: convert insertion Peter Zijlstra
2006-07-12 14:37 ` [PATCH 5/39] mm: pgrep: add a use-once insertion hint Peter Zijlstra
2006-07-12 14:38 ` [PATCH 6/39] mm: pgrep: generice __pagevec_*_add Peter Zijlstra
2006-07-12 14:38 ` [PATCH 7/39] mm: pgrep: abstract the activation logic Peter Zijlstra
2006-07-12 14:38 ` [PATCH 8/39] mm: pgrep: move useful macros around Peter Zijlstra
2006-07-12 14:38 ` [PATCH 9/39] mm: pgrep: move struct scan_control around Peter Zijlstra
2006-07-12 14:38 ` [PATCH 10/39] mm: pgrep: isolate the reclaim_mapped logic Peter Zijlstra
2006-07-12 14:39 ` [PATCH 11/39] mm: pgrep: replace mark_page_accessed Peter Zijlstra
2006-07-12 14:39 ` [PATCH 12/39] mm: pgrep: move the shrink logic Peter Zijlstra
2006-07-12 14:39 ` [PATCH 13/39] mm: pgrep: abstract rotate_reclaimable_page() Peter Zijlstra
2006-07-12 14:39 ` [PATCH 14/39] mm: pgrep: manage page-state Peter Zijlstra
2006-07-12 14:39 ` [PATCH 15/39] mm: pgrep: abstract page removal Peter Zijlstra
2006-07-12 14:40 ` [PATCH 16/39] mm: pgrep: remove mm_inline.h Peter Zijlstra
2006-07-12 14:40 ` [PATCH 17/39] mm: pgrep: re-insertion logic Peter Zijlstra
2006-07-12 14:40 ` [PATCH 18/39] mm: pgrep: initialisation hooks Peter Zijlstra
2006-07-12 14:40 ` [PATCH 19/39] mm: pgrep: info functions Peter Zijlstra
2006-07-12 14:40 ` [PATCH 20/39] mm: pgrep: page count functions Peter Zijlstra
2006-07-12 14:41 ` [PATCH 21/39] mm: pgrep: per policy data Peter Zijlstra
2006-07-12 14:41 ` [PATCH 22/39] mm: pgrep: per policy PG_flags Peter Zijlstra
2006-07-12 14:41 ` [PATCH 23/39] mm: pgrep: nonresident page tracking hooks Peter Zijlstra
2006-07-12 14:41 ` [PATCH 24/39] mm: pgrep: generic shrinker logic Peter Zijlstra
2006-07-12 14:41 ` [PATCH 25/39] mm: pgrep: documentation Peter Zijlstra
2006-07-12 14:42 ` [PATCH 26/39] sum_cpu_var Peter Zijlstra
2006-07-12 14:42 ` Peter Zijlstra [this message]
2006-07-12 14:42 ` [PATCH 28/39] mm: clockpro: re-introduce page_referenced() ignore_token Peter Zijlstra
2006-07-12 14:42 ` [PATCH 29/39] mm: clockpro: second per policy PG_flag Peter Zijlstra
2006-07-12 14:42 ` [PATCH 30/39] mm: clockpro: CLOCK-Pro policy implementation Peter Zijlstra
2006-07-12 14:43 ` [PATCH 31/39] mm: cart: nonresident page tracking for CART Peter Zijlstra
2006-07-12 14:43 ` [PATCH 32/39] mm: cart: third per policy PG_flag Peter Zijlstra
2006-07-12 14:43 ` [PATCH 33/39] mm: cart: CART policy implementation Peter Zijlstra
2006-07-12 14:43 ` [PATCH 34/39] mm: cart: CART-r " Peter Zijlstra
2006-07-12 14:43 ` [PATCH 35/39] mm: random: random page replacement policy Peter Zijlstra
2006-07-12 14:44 ` [PATCH 36/39] mm: refault histogram for non-resident policies Peter Zijlstra
2006-07-12 14:44 ` [PATCH 37/39] mm: use-once: cleanup of the use-once logic Peter Zijlstra
2006-07-12 14:44 ` [PATCH 38/39] mm: use-once: use the generic shrinker logic Peter Zijlstra
2006-07-12 14:44 ` [PATCH 39/39] mm: use-once: cleanup of the insertion logic Peter Zijlstra
2006-07-13 15:38 ` [PATCH 0/39] mm: 2.6.17-pr1 - generic page-replacement framework and 4 new policies Christoph Lameter
2006-07-15 17:03   ` Peter Zijlstra
2006-07-16  3:50     ` Christoph Lameter
2006-07-26 10:03       ` Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060712144218.16998.32605.sendpatchset@lappy \
    --to=a.p.zijlstra@chello.nl \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.