* [PATCH] zoned and jiffies based vm
@ 2000-06-22 20:06 Roger Larsson
0 siblings, 0 replies; only message in thread
From: Roger Larsson @ 2000-06-22 20:06 UTC (permalink / raw)
To: linux-mm@kvack.org
[-- Attachment #1: Type: text/plain, Size: 798 bytes --]
This is an attempt to try to solve the performance and CPU usage
problems in vm subsystem.
During the development of this patch I have found some interesting
things:
* Most read pages will end up as Referenced even if the PG_referenced
bit is cleared when inserted into LRU - this is probably due to the
pages being read ahead, and thus later referred...
Improvements/bugs in patch:
* does not handle age wrap of really old pages.
* does not use reused pointer.
* could use another counter (incremented at use) or
mechanism instead (function that ages all pages at once).
* I am not sure how it will handle mmap002, forgot to run it before
connecting to internet...
I am sending it as is since I will be away this weekend...
/RogerL
--
Home page:
http://www.norran.net/nra02596/
[-- Attachment #2: patch-2.4.0-test2-pre6-roger.jiffiesvm --]
[-- Type: text/plain, Size: 11521 bytes --]
diff -aur linux/include/linux/mm.h roger/include/linux/mm.h
--- linux/include/linux/mm.h Fri May 12 21:16:14 2000
+++ roger/include/linux/mm.h Thu Jun 22 21:35:33 2000
@@ -148,6 +148,8 @@
atomic_t count;
unsigned long flags; /* atomic flags, some possibly updated asynchronously */
struct list_head lru;
+ unsigned long lru_born; /* at jiffies */
+ long lru_reused; /* no of times reused */
wait_queue_head_t wait;
struct page **pprev_hash;
struct buffer_head * buffers;
@@ -196,7 +198,8 @@
#define SetPageError(page) set_bit(PG_error, &(page)->flags)
#define ClearPageError(page) clear_bit(PG_error, &(page)->flags)
#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags)
-#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags)
+#define SetPageReferenced(page) {(page)->lru_born = jiffies; set_bit(PG_referenced, &(page)->flags);}
+#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags)
#define PageTestandClearReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags)
#define PageDecrAfter(page) test_bit(PG_decr_after, &(page)->flags)
#define SetPageDecrAfter(page) set_bit(PG_decr_after, &(page)->flags)
diff -aur linux/include/linux/mmzone.h roger/include/linux/mmzone.h
--- linux/include/linux/mmzone.h Fri May 12 21:16:13 2000
+++ roger/include/linux/mmzone.h Thu Jun 22 21:35:33 2000
@@ -32,6 +32,9 @@
char zone_wake_kswapd;
unsigned long pages_min, pages_low, pages_high;
+ int nr_lru_pages;
+ struct list_head lru_cache;
+
/*
* free areas of different sizes
*/
diff -aur linux/include/linux/swap.h roger/include/linux/swap.h
--- linux/include/linux/swap.h Fri May 12 21:16:13 2000
+++ roger/include/linux/swap.h Thu Jun 22 21:35:33 2000
@@ -67,7 +67,6 @@
FASTCALL(unsigned int nr_free_pages(void));
FASTCALL(unsigned int nr_free_buffer_pages(void));
FASTCALL(unsigned int nr_free_highpages(void));
-extern int nr_lru_pages;
extern atomic_t nr_async_pages;
extern struct address_space swapper_space;
extern atomic_t page_cache_size;
@@ -165,16 +164,19 @@
*/
#define lru_cache_add(page) \
do { \
+ zone_t *zone = (page)->zone; \
spin_lock(&pagemap_lru_lock); \
- list_add(&(page)->lru, &lru_cache); \
- nr_lru_pages++; \
+ list_add(&(page)->lru, &zone->lru_cache); \
+ page->lru_born = jiffies; \
+ page->lru_reused = 1; \
+ zone->nr_lru_pages++; \
spin_unlock(&pagemap_lru_lock); \
} while (0)
#define __lru_cache_del(page) \
do { \
list_del(&(page)->lru); \
- nr_lru_pages--; \
+ (page)->zone->nr_lru_pages--; \
} while (0)
#define lru_cache_del(page) \
diff -aur linux/mm/filemap.c roger/mm/filemap.c
--- linux/mm/filemap.c Wed May 31 20:13:36 2000
+++ roger/mm/filemap.c Thu Jun 22 21:35:05 2000
@@ -44,7 +44,7 @@
atomic_t page_cache_size = ATOMIC_INIT(0);
unsigned int page_hash_bits;
struct page **page_hash_table;
-struct list_head lru_cache;
+long lru_pensionable_age = 60*HZ;
static spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED;
/*
@@ -249,25 +249,109 @@
* before doing sync writes. We can only do sync writes if we can
* wait for IO (__GFP_IO set).
*/
+int shrink_zone_mmap(zone_t *zone, int priority, int gfp_mask, int *recomend);
+
int shrink_mmap(int priority, int gfp_mask)
{
+ int ret = 0, modify_pensionable_age = 1;
+
+ /*
+ * alternative... from page_alloc.c
+ *
+ * for (i = 0; i < NUMNODES; i++)
+ * for (zone = NODE_DATA(i)->node_zones;
+ * zone < NODE_DATA(i)->node_zones + MAX_NR_ZONES;
+ * zone++)
+ */
+
+ pg_data_t *pgdat = pgdat_list;
+
+ do {
+ int i;
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ zone_t *zone = pgdat->node_zones+ i;
+
+ /*
+ * do stuff, if from a zone we care about
+ */
+ if (zone->zone_wake_kswapd) {
+ int recomend;
+ ret += shrink_zone_mmap(zone, priority, gfp_mask, &recomend);
+
+ if (recomend < modify_pensionable_age) {
+ modify_pensionable_age = recomend;
+ }
+ }
+
+ }
+ pgdat = pgdat->node_next;
+ } while (pgdat);
+
+ /* all pages in all zones with pressure scanned, time to modify */
+ if (modify_pensionable_age < 0) {
+ lru_pensionable_age /= 2;
+ }
+ else if (modify_pensionable_age > 0) {
+ lru_pensionable_age += HZ;
+ }
+
+ return ret;
+}
+
+int shrink_zone_mmap(zone_t *zone, int priority, int gfp_mask, int *recomend)
+{
int ret = 0, count, nr_dirty;
+ long page_age = 0;
+ int pages_scanned = 0;
struct list_head * page_lru;
struct page * page = NULL;
+
+ /* debug */
+ int pages_referenced = 0;
+ long page_age_sum = 0;
+ long page_age_min = +24*60*60*HZ;
+ long page_age_max = -24*60*60*HZ;
- count = nr_lru_pages / (priority + 1);
+ count = zone->nr_lru_pages / (priority + 1);
nr_dirty = priority;
/* we need pagemap_lru_lock for list_del() ... subtle code below */
spin_lock(&pagemap_lru_lock);
- while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache) {
+ while (count > 0 && zone->zone_wake_kswapd &&
+ (page_lru = zone->lru_cache.prev) != &zone->lru_cache) {
+
page = list_entry(page_lru, struct page, lru);
list_del(page_lru);
- if (PageTestandClearReferenced(page))
- goto dispose_continue;
+ /* debug, lru_born is set when marked as referenced */
+ if (PageTestandClearReferenced(page)) {
+ page->lru_reused++;
+ pages_referenced++;
+ }
+
+ page_age = (long)(jiffies - page->lru_born);
+ pages_scanned++;
+
+ /* debug vars */
+ if (page_age < page_age_min) page_age_min = page_age;
+ if (page_age > page_age_max) page_age_max = page_age;
+ page_age_sum += page_age;
+
+ if (pages_scanned > zone->nr_lru_pages) {
+
+ list_add(page_lru, &zone->lru_cache); /* goto dispose_continue */
+ /* all pages scanned without result, indicate to caller */
+ *recomend = -1;
+
+ page_age = -1;
+ goto out;
+ }
+
+ if (page_age < lru_pensionable_age)
+ goto dispose_continue;
count--;
+
/*
* Avoid unscalable SMP locking for pages we can
* immediate tell are untouchable..
@@ -327,13 +411,6 @@
goto made_inode_progress;
}
- /*
- * Page is from a zone we don't care about.
- * Don't drop page cache entries in vain.
- */
- if (page->zone->free_pages > page->zone->pages_high)
- goto cache_unlock_continue;
-
/* is it a page-cache page? */
if (page->mapping) {
if (!PageDirty(page) && !pgcache_under_min()) {
@@ -345,6 +422,20 @@
}
printk(KERN_ERR "shrink_mmap: unknown LRU page!\n");
+ goto cache_unlock_continue;
+
+
+made_inode_progress:
+ page_cache_release(page);
+made_buffer_progress:
+ UnlockPage(page);
+ page_cache_release(page);
+ ret++;
+ spin_lock(&pagemap_lru_lock);
+ /* nr_lru_pages needs the spinlock */
+ zone->nr_lru_pages--;
+
+ continue;
cache_unlock_continue:
spin_unlock(&pagecache_lock);
@@ -353,22 +444,22 @@
UnlockPage(page);
page_cache_release(page);
dispose_continue:
- list_add(page_lru, &lru_cache);
+ list_add(page_lru, &zone->lru_cache);
}
- goto out;
-made_inode_progress:
- page_cache_release(page);
-made_buffer_progress:
- UnlockPage(page);
- page_cache_release(page);
- ret = 1;
- spin_lock(&pagemap_lru_lock);
- /* nr_lru_pages needs the spinlock */
- nr_lru_pages--;
+ if (zone->zone_wake_kswapd)
+ *recomend = 0;
+ else
+ *recomend = +1;
out:
spin_unlock(&pagemap_lru_lock);
+
+ printk(KERN_DEBUG "lru %s %3d(%3d) %5ld>%5ld [%5ld %5ld %5ld]\n",
+ zone->name,
+ ret, pages_scanned,
+ page_age, lru_pensionable_age,
+ page_age_min, page_age_sum / pages_scanned, page_age_max);
return ret;
}
diff -aur linux/mm/page_alloc.c roger/mm/page_alloc.c
--- linux/mm/page_alloc.c Fri May 12 20:21:20 2000
+++ roger/mm/page_alloc.c Thu Jun 22 21:35:05 2000
@@ -25,7 +25,6 @@
#endif
int nr_swap_pages;
-int nr_lru_pages;
pg_data_t *pgdat_list;
static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
@@ -313,6 +312,22 @@
}
/*
+ * Total amount of free (allocatable) RAM:
+ */
+unsigned int nr_lru_pages (void)
+{
+ unsigned int sum;
+ zone_t *zone;
+ int i;
+
+ sum = 0;
+ for (i = 0; i < NUMNODES; i++)
+ for (zone = NODE_DATA(i)->node_zones; zone < NODE_DATA(i)->node_zones + MAX_NR_ZONES; zone++)
+ sum += zone->nr_lru_pages;
+ return sum;
+}
+
+/*
* Amount of free RAM allocatable as buffer memory:
*/
unsigned int nr_free_buffer_pages (void)
@@ -321,10 +336,10 @@
zone_t *zone;
int i;
- sum = nr_lru_pages;
+ sum = 0;
for (i = 0; i < NUMNODES; i++)
for (zone = NODE_DATA(i)->node_zones; zone <= NODE_DATA(i)->node_zones+ZONE_NORMAL; zone++)
- sum += zone->free_pages;
+ sum += zone->free_pages + zone->nr_lru_pages;
return sum;
}
@@ -356,7 +371,7 @@
printk("( Free: %d, lru_cache: %d (%d %d %d) )\n",
nr_free_pages(),
- nr_lru_pages,
+ nr_lru_pages(),
freepages.min,
freepages.low,
freepages.high);
@@ -497,7 +512,6 @@
freepages.min += i;
freepages.low += i * 2;
freepages.high += i * 3;
- memlist_init(&lru_cache);
/*
* Some architectures (with lots of mem and discontinous memory
@@ -543,6 +557,10 @@
zone->lock = SPIN_LOCK_UNLOCKED;
zone->zone_pgdat = pgdat;
zone->free_pages = 0;
+
+ zone->nr_lru_pages = 0;
+ memlist_init(&zone->lru_cache);
+
if (!size)
continue;
diff -aur linux/mm/vmscan.c roger/mm/vmscan.c
--- linux/mm/vmscan.c Wed May 31 20:13:37 2000
+++ roger/mm/vmscan.c Thu Jun 22 21:35:05 2000
@@ -436,14 +436,16 @@
int priority;
int count = FREE_COUNT;
int swap_count;
+ int progress;
/* Always trim SLAB caches when memory gets low. */
kmem_cache_reap(gfp_mask);
priority = 64;
do {
- while (shrink_mmap(priority, gfp_mask)) {
- if (!--count)
+ while ((progress = shrink_mmap(priority, gfp_mask)) > 0) {
+ count -= progress;
+ if (count <= 0)
goto done;
}
@@ -480,8 +482,9 @@
} while (--priority >= 0);
/* Always end on a shrink_mmap.. */
- while (shrink_mmap(0, gfp_mask)) {
- if (!--count)
+ while ((progress = shrink_mmap(0, gfp_mask)) > 0) {
+ count -= progress;
+ if (count <= 0)
goto done;
}
/* We return 1 if we are freed some page */
@@ -491,6 +494,27 @@
return 1;
}
+
+static int memory_pressure()
+{
+ pg_data_t *pgdat;
+
+ pgdat = pgdat_list;
+ do {
+ int i;
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ zone_t *zone = pgdat->node_zones+ i;
+ if (zone->size &&
+ zone->free_pages < zone->pages_low) {
+ return 1;
+ }
+ }
+ pgdat = pgdat->node_next;
+ } while (pgdat);
+
+ return 0;
+}
+
DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
/*
@@ -530,29 +554,16 @@
tsk->flags |= PF_MEMALLOC;
for (;;) {
- pg_data_t *pgdat;
- int something_to_do = 0;
+ if (memory_pressure()) {
+ do_try_to_free_pages(GFP_KSWAPD);
- pgdat = pgdat_list;
- do {
- int i;
- for(i = 0; i < MAX_NR_ZONES; i++) {
- zone_t *zone = pgdat->node_zones+ i;
- if (tsk->need_resched)
- schedule();
- if (!zone->size || !zone->zone_wake_kswapd)
- continue;
- if (zone->free_pages < zone->pages_low)
- something_to_do = 1;
- do_try_to_free_pages(GFP_KSWAPD);
- }
- pgdat = pgdat->node_next;
- } while (pgdat);
-
- if (!something_to_do) {
- tsk->state = TASK_INTERRUPTIBLE;
- interruptible_sleep_on(&kswapd_wait);
- }
+ if (tsk->need_resched)
+ schedule();
+ }
+ else {
+ tsk->state = TASK_INTERRUPTIBLE;
+ interruptible_sleep_on(&kswapd_wait);
+ }
}
}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2000-06-22 20:09 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2000-06-22 20:06 [PATCH] zoned and jiffies based vm Roger Larsson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.