From: Marcelo Tosatti <marcelo@kvack.org>
To: linux-mm@kvack.org
Cc: "KOSAKI Motohiro" <kosaki.motohiro@jp.fujitsu.com>,
"Daniel Spång" <daniel.spang@gmail.com>,
"Rik van Riel" <riel@redhat.com>,
"Andrew Morton" <akpm@linux-foundation.org>
Subject: [PATCH] mem notifications v3
Date: Mon, 24 Dec 2007 15:32:50 -0500 [thread overview]
Message-ID: <20071224203250.GA23149@dmt> (raw)
Follows updated version of mem-notify.
This changes the notification point to happen whenever the VM moves an
anonymous page to the inactive list - this is a pretty good indication
that there are unused anonymous pages present which will be very likely
swapped out soon.
Since the notification happens at shrink_zone() which can be called very
often the wakeups are rate limited to 5 times per second (on each CPU).
Index: marcelo/dev/mm/linux-2.6.24-rc2-mm1/Documentation/devices.txt
===================================================================
--- marcelo.orig/dev/mm/linux-2.6.24-rc2-mm1/Documentation/devices.txt
+++ marcelo/dev/mm/linux-2.6.24-rc2-mm1/Documentation/devices.txt
@@ -96,6 +96,7 @@ Your cooperation is appreciated.
11 = /dev/kmsg Writes to this come out as printk's
12 = /dev/oldmem Used by crashdump kernels to access
the memory of the kernel that crashed.
+ 13 = /dev/mem_notify Low memory notification.
1 block RAM disk
0 = /dev/ram0 First RAM disk
Index: marcelo/dev/mm/linux-2.6.24-rc2-mm1/drivers/char/mem.c
===================================================================
--- marcelo.orig/dev/mm/linux-2.6.24-rc2-mm1/drivers/char/mem.c
+++ marcelo/dev/mm/linux-2.6.24-rc2-mm1/drivers/char/mem.c
@@ -34,6 +34,8 @@
# include <linux/efi.h>
#endif
+extern struct file_operations mem_notify_fops;
+
/*
* Architectures vary in how they handle caching for addresses
* outside of main memory.
@@ -854,6 +856,9 @@ static int memory_open(struct inode * in
filp->f_op = &oldmem_fops;
break;
#endif
+ case 13:
+ filp->f_op = &mem_notify_fops;
+ break;
default:
return -ENXIO;
}
@@ -886,6 +891,7 @@ static const struct {
#ifdef CONFIG_CRASH_DUMP
{12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
#endif
+ {13,"mem_notify", S_IRUGO, &mem_notify_fops},
};
static struct class *mem_class;
Index: marcelo/dev/mm/linux-2.6.24-rc2-mm1/include/linux/swap.h
===================================================================
--- marcelo.orig/dev/mm/linux-2.6.24-rc2-mm1/include/linux/swap.h
+++ marcelo/dev/mm/linux-2.6.24-rc2-mm1/include/linux/swap.h
@@ -213,6 +213,9 @@ extern int shmem_unuse(swp_entry_t entry
extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
+/* linux/mm/mem_notify.c */
+void mem_notify_userspace(void);
+
#ifdef CONFIG_SWAP
/* linux/mm/page_io.c */
extern int swap_readpage(struct file *, struct page *);
Index: marcelo/dev/mm/linux-2.6.24-rc2-mm1/mm/Makefile
===================================================================
--- marcelo.orig/dev/mm/linux-2.6.24-rc2-mm1/mm/Makefile
+++ marcelo/dev/mm/linux-2.6.24-rc2-mm1/mm/Makefile
@@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o
page_alloc.o page-writeback.o pdflush.o \
readahead.o swap.o truncate.o vmscan.o \
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
- page_isolation.o $(mmu-y)
+ page_isolation.o mem_notify.o $(mmu-y)
obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
obj-$(CONFIG_BOUNCE) += bounce.o
Index: marcelo/dev/mm/linux-2.6.24-rc2-mm1/mm/mem_notify.c
===================================================================
--- /dev/null
+++ marcelo/dev/mm/linux-2.6.24-rc2-mm1/mm/mem_notify.c
@@ -0,0 +1,80 @@
+/*
+ * Notify applications of memory pressure via /dev/mem_notify
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/percpu.h>
+#include <linux/timer.h>
+
+static unsigned long mem_notify_status = 0;
+
+static DECLARE_WAIT_QUEUE_HEAD(mem_wait);
+static DEFINE_PER_CPU(unsigned long, last_mem_notify) = INITIAL_JIFFIES;
+
+/* maximum 5 notifications per second per cpu */
+void mem_notify_userspace(void)
+{
+ unsigned long target;
+ unsigned long now = jiffies;
+
+ target = __get_cpu_var(last_mem_notify) + (HZ/5);
+
+ if (time_after(now, target)) {
+ __get_cpu_var(last_mem_notify) = now;
+ mem_notify_status = 1;
+ wake_up(&mem_wait);
+ }
+}
+
+static int mem_notify_open(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+
+static int mem_notify_release(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+
+static unsigned int mem_notify_poll(struct file *file, poll_table *wait)
+{
+ unsigned int val = 0;
+
+ poll_wait(file, &mem_wait, wait);
+
+ if (mem_notify_status) {
+ struct zone *zone;
+ int pages_high, pages_free, pages_reserve;
+
+ mem_notify_status = 0;
+
+ /* check if its not a spurious/stale notification */
+ pages_high = pages_free = pages_reserve = 0;
+ for_each_zone(zone) {
+ if (!populated_zone(zone) || is_highmem(zone))
+ continue;
+ pages_high += zone->pages_high;
+ pages_free += zone_page_state(zone, NR_FREE_PAGES);
+ pages_reserve += zone->lowmem_reserve[MAX_NR_ZONES-1];
+ }
+
+ if (pages_free < (pages_high+pages_reserve)*2)
+ val = POLLIN;
+ }
+
+ return val;
+}
+
+struct file_operations mem_notify_fops = {
+ .open = mem_notify_open,
+ .release = mem_notify_release,
+ .poll = mem_notify_poll,
+};
+EXPORT_SYMBOL(mem_notify_fops);
Index: marcelo/dev/mm/linux-2.6.24-rc2-mm1/mm/vmscan.c
===================================================================
--- marcelo.orig/dev/mm/linux-2.6.24-rc2-mm1/mm/vmscan.c
+++ marcelo/dev/mm/linux-2.6.24-rc2-mm1/mm/vmscan.c
@@ -960,7 +960,7 @@ static inline int zone_is_near_oom(struc
* The downside is that we have to touch page->_count against each page.
* But we had to alter page->flags anyway.
*/
-static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
+static bool shrink_active_list(unsigned long nr_pages, struct zone *zone,
struct scan_control *sc, int priority)
{
unsigned long pgmoved;
@@ -972,6 +972,7 @@ static void shrink_active_list(unsigned
struct page *page;
struct pagevec pvec;
int reclaim_mapped = 0;
+ bool inactivated_anon = 0;
if (sc->may_swap) {
long mapped_ratio;
@@ -1078,6 +1079,13 @@ force_reclaim_mapped:
if (!reclaim_mapped ||
(total_swap_pages == 0 && PageAnon(page)) ||
page_referenced(page, 0, sc->mem_cgroup)) {
+ /* deal with the case where there is no
+ * swap but an anonymous page would be
+ * moved to the inactive list.
+ */
+ if (!total_swap_pages && reclaim_mapped &&
+ PageAnon(page))
+ inactivated_anon = 1;
list_add(&page->lru, &l_active);
continue;
}
@@ -1085,6 +1093,8 @@ force_reclaim_mapped:
list_add(&page->lru, &l_active);
continue;
}
+ if (PageAnon(page))
+ inactivated_anon = 1;
list_add(&page->lru, &l_inactive);
}
@@ -1146,6 +1156,7 @@ force_reclaim_mapped:
spin_unlock_irq(&zone->lru_lock);
pagevec_release(&pvec);
+ return inactivated_anon;
}
/*
@@ -1158,6 +1169,7 @@ static unsigned long shrink_zone(int pri
unsigned long nr_inactive;
unsigned long nr_to_scan;
unsigned long nr_reclaimed = 0;
+ bool inactivated_anon = 0;
/*
* Add one to `nr_to_scan' just to make sure that the kernel will
@@ -1184,7 +1196,8 @@ static unsigned long shrink_zone(int pri
nr_to_scan = min(nr_active,
(unsigned long)sc->swap_cluster_max);
nr_active -= nr_to_scan;
- shrink_active_list(nr_to_scan, zone, sc, priority);
+ if (shrink_active_list(nr_to_scan, zone, sc, priority))
+ inactivated_anon = 1;
}
if (nr_inactive) {
@@ -1196,6 +1209,9 @@ static unsigned long shrink_zone(int pri
}
}
+ if (inactivated_anon)
+ mem_notify_userspace();
+
throttle_vm_writeout(sc->gfp_mask);
return nr_reclaimed;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next reply other threads:[~2007-12-24 20:32 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-12-24 20:32 Marcelo Tosatti [this message]
2007-12-25 3:47 ` [PATCH] mem notifications v3 KOSAKI Motohiro
2007-12-25 4:56 ` [RFC] add poll_wait_exclusive() API KOSAKI Motohiro
2007-12-27 21:05 ` Marcelo Tosatti
2007-12-25 8:31 ` [PATCH] mem notifications v3 KOSAKI Motohiro
2007-12-25 10:31 ` [RFC][patch 1/2] mem notifications v3 improvement for large system KOSAKI Motohiro
2007-12-27 21:04 ` Marcelo Tosatti
2007-12-28 0:38 ` KOSAKI Motohiro
2007-12-25 10:31 ` [RFC][patch 2/2] " KOSAKI Motohiro
2007-12-25 10:41 ` KOSAKI Motohiro
2007-12-27 4:49 ` [RFC][patch] mem_notify more faster reduce load average KOSAKI Motohiro
2007-12-27 20:13 ` [PATCH] mem notifications v3 Marcelo Tosatti
2007-12-28 1:44 ` KOSAKI Motohiro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20071224203250.GA23149@dmt \
--to=marcelo@kvack.org \
--cc=akpm@linux-foundation.org \
--cc=daniel.spang@gmail.com \
--cc=kosaki.motohiro@jp.fujitsu.com \
--cc=linux-mm@kvack.org \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.