* [PATCH] RSS limit enforcement for 2.6
@ 2004-03-15 23:21 Rik van Riel
2004-03-16 6:08 ` Nick Piggin
0 siblings, 1 reply; 6+ messages in thread
From: Rik van Riel @ 2004-03-15 23:21 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-mm, linux-kernel, Hugh Dickins, Pavel Machek, Nick Piggin
Hi,
Hugh Dickins found a bug in the 2.4-rmap RSS limit enforcing
code that may well explain why the previous port of the code
to 2.6 resulted in bad performance. The split active lists
in 2.4-rmap probably masked the largest damages, but in 2.6
it was very much visible.
The patch below should work. Pavel, Nick, still interested
in testing the performance ? ;)
===== fs/exec.c 1.105 vs edited =====
--- 1.105/fs/exec.c Wed Feb 25 05:34:47 2004
+++ edited/fs/exec.c Mon Mar 15 17:27:06 2004
@@ -1119,6 +1119,11 @@
if (retval < 0)
goto out_mm;
+ if (likely(current->mm))
+ bprm.mm->rlimit_rss = current->mm->rlimit_rss;
+ else
+ bprm.mm->rlimit_rss = init_mm.rlimit_rss;
+
bprm.argc = count(argv, bprm.p / sizeof(void *));
if ((retval = bprm.argc) < 0)
goto out_mm;
===== include/linux/init_task.h 1.29 vs edited =====
--- 1.29/include/linux/init_task.h Wed Feb 18 22:42:38 2004
+++ edited/include/linux/init_task.h Mon Mar 15 17:27:57 2004
@@ -2,6 +2,7 @@
#define _LINUX__INIT_TASK_H
#include <linux/file.h>
+#include <asm/resource.h>
#define INIT_FILES \
{ \
@@ -42,6 +43,7 @@
.mmlist = LIST_HEAD_INIT(name.mmlist), \
.cpu_vm_mask = CPU_MASK_ALL, \
.default_kioctx = INIT_KIOCTX(name.default_kioctx, name), \
+ .rlimit_rss = RLIM_INFINITY, \
}
#define INIT_SIGNALS(sig) { \
===== include/linux/sched.h 1.185 vs edited =====
--- 1.185/include/linux/sched.h Sun Mar 7 02:05:01 2004
+++ edited/include/linux/sched.h Mon Mar 15 17:28:38 2004
@@ -205,6 +205,7 @@
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long rss, total_vm, locked_vm;
unsigned long def_flags;
+ unsigned long rlimit_rss;
unsigned long saved_auxv[40]; /* for /proc/PID/auxv */
===== include/linux/swap.h 1.80 vs edited =====
--- 1.80/include/linux/swap.h Mon Jan 19 01:28:35 2004
+++ edited/include/linux/swap.h Mon Mar 15 17:29:00 2004
@@ -179,7 +179,7 @@
/* linux/mm/rmap.c */
#ifdef CONFIG_MMU
-int FASTCALL(page_referenced(struct page *));
+int FASTCALL(page_referenced(struct page *, int *));
struct pte_chain *FASTCALL(page_add_rmap(struct page *, pte_t *,
struct pte_chain *));
void FASTCALL(page_remove_rmap(struct page *, pte_t *));
@@ -188,7 +188,7 @@
/* linux/mm/shmem.c */
extern int shmem_unuse(swp_entry_t entry, struct page *page);
#else
-#define page_referenced(page) TestClearPageReferenced(page)
+#define page_referenced(page, _x) TestClearPageReferenced(page)
#define try_to_unmap(page) SWAP_FAIL
#endif /* CONFIG_MMU */
===== kernel/sys.c 1.73 vs edited =====
--- 1.73/kernel/sys.c Mon Feb 23 14:46:54 2004
+++ edited/kernel/sys.c Mon Mar 15 17:30:13 2004
@@ -1489,6 +1489,14 @@
if (retval)
return retval;
+ /* The rlimit is specified in bytes, convert to pages for mm. */
+ if (resource == RLIMIT_RSS && current->mm) {
+ unsigned long pages = RLIM_INFINITY;
+ if (new_rlim.rlim_cur != RLIM_INFINITY)
+ pages = new_rlim.rlim_cur >> PAGE_SHIFT;
+ current->mm->rlimit_rss = pages;
+ }
+
*old_rlim = new_rlim;
return 0;
}
===== mm/rmap.c 1.36 vs edited =====
--- 1.36/mm/rmap.c Sun Mar 7 02:04:57 2004
+++ edited/mm/rmap.c Mon Mar 15 17:30:45 2004
@@ -104,6 +104,7 @@
/**
* page_referenced - test if the page was referenced
* @page: the page to test
+ * @rsslimit: set if the process(es) using the page is(are) over RSS limit
*
* Quick test_and_clear_referenced for all mappings to a page,
* returns the number of processes which referenced the page.
@@ -112,10 +113,11 @@
* If the page has a single-entry pte_chain, collapse that back to a PageDirect
* representation. This way, it's only done under memory pressure.
*/
-int fastcall page_referenced(struct page * page)
+int fastcall page_referenced(struct page * page, int * rsslimit)
{
struct pte_chain *pc;
- int referenced = 0;
+ int referenced = 0, over_rsslimit = 0;
+ struct mm_struct * mm;
if (page_test_and_clear_young(page))
mark_page_accessed(page);
@@ -125,11 +127,15 @@
if (PageDirect(page)) {
pte_t *pte = rmap_ptep_map(page->pte.direct);
+ mm = ptep_to_mm(pte);
if (ptep_test_and_clear_young(pte))
referenced++;
+ if (mm->rss > mm->rlimit_rss)
+ over_rsslimit = 1;
rmap_ptep_unmap(pte);
- } else {
+ } else if (page->pte.chain) {
int nr_chains = 0;
+ int over_rsslimit = 1;
/* Check all the page tables mapping this page. */
for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) {
@@ -142,6 +148,9 @@
p = rmap_ptep_map(pte_paddr);
if (ptep_test_and_clear_young(p))
referenced++;
+ mm = ptep_to_mm(p);
+ if (mm->rss <= mm->rlimit_rss)
+ over_rsslimit = 0;
rmap_ptep_unmap(p);
nr_chains++;
}
@@ -154,6 +163,8 @@
__pte_chain_free(pc);
}
}
+ *rsslimit = over_rsslimit;
+
return referenced;
}
===== mm/vmscan.c 1.198 vs edited =====
--- 1.198/mm/vmscan.c Fri Mar 12 04:33:10 2004
+++ edited/mm/vmscan.c Mon Mar 15 18:07:32 2004
@@ -250,6 +250,7 @@
LIST_HEAD(ret_pages);
struct pagevec freed_pvec;
int pgactivate = 0;
+ int over_rsslimit;
int ret = 0;
cond_resched();
@@ -276,8 +277,8 @@
goto keep_locked;
pte_chain_lock(page);
- referenced = page_referenced(page);
- if (referenced && page_mapping_inuse(page)) {
+ referenced = page_referenced(page, &over_rsslimit);
+ if (referenced && page_mapping_inuse(page) && !over_rsslimit) {
/* In active use or really unfreeable. Activate it. */
pte_chain_unlock(page);
goto activate_locked;
@@ -593,6 +594,7 @@
long mapped_ratio;
long distress;
long swap_tendency;
+ int over_rsslimit;
lru_add_drain();
pgmoved = 0;
@@ -657,7 +659,7 @@
continue;
}
pte_chain_lock(page);
- if (page_referenced(page)) {
+ if (page_referenced(page, &over_rsslimit) && !over_rsslimit) {
pte_chain_unlock(page);
list_add(&page->lru, &l_active);
continue;
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] RSS limit enforcement for 2.6
2004-03-15 23:21 [PATCH] RSS limit enforcement for 2.6 Rik van Riel
@ 2004-03-16 6:08 ` Nick Piggin
2004-03-18 22:04 ` Pavel Machek
0 siblings, 1 reply; 6+ messages in thread
From: Nick Piggin @ 2004-03-16 6:08 UTC (permalink / raw)
To: Rik van Riel
Cc: Andrew Morton, linux-mm, linux-kernel, Hugh Dickins, Pavel Machek
Rik van Riel wrote:
>Hi,
>
>Hugh Dickins found a bug in the 2.4-rmap RSS limit enforcing
>code that may well explain why the previous port of the code
>to 2.6 resulted in bad performance. The split active lists
>in 2.4-rmap probably masked the largest damages, but in 2.6
>it was very much visible.
>
>
Hi Rik,
What was the problem by the way?
>The patch below should work. Pavel, Nick, still interested
>in testing the performance ? ;)
>
I could do that.
>@@ -593,6 +594,7 @@
> long mapped_ratio;
> long distress;
> long swap_tendency;
>+ int over_rsslimit;
>
> lru_add_drain();
> pgmoved = 0;
>@@ -657,7 +659,7 @@
> continue;
> }
> pte_chain_lock(page);
>- if (page_referenced(page)) {
>+ if (page_referenced(page, &over_rsslimit) && !over_rsslimit) {
> pte_chain_unlock(page);
> list_add(&page->lru, &l_active);
> continue;
>
This still has a problem that !reclaim_mapped scans will not
shrink a runaway process before putting a lot of pressure on
the rest of the pagecache.
You could do a page_gather_pte_info type thing that doesn't
actually clear all the referenced bits (would probably
SetPageReferenced). Unfortunately this has the downside that
you also need to walk the pte chains for all mapped pages even
in the !reclaim_mapped case.
But it is a good start. We advertise the functionality, so we
should be trying to do something with rss limits.
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] RSS limit enforcement for 2.6
2004-03-16 6:08 ` Nick Piggin
@ 2004-03-18 22:04 ` Pavel Machek
2004-03-25 14:44 ` Rik van Riel
0 siblings, 1 reply; 6+ messages in thread
From: Pavel Machek @ 2004-03-18 22:04 UTC (permalink / raw)
To: Nick Piggin
Cc: Rik van Riel, Andrew Morton, linux-mm, linux-kernel, Hugh Dickins,
Pavel Machek
Hi!
> >Hugh Dickins found a bug in the 2.4-rmap RSS limit enforcing
> >code that may well explain why the previous port of the code
> >to 2.6 resulted in bad performance. The split active lists
> >in 2.4-rmap probably masked the largest damages, but in 2.6
> >it was very much visible.
> >
> >
>
> Hi Rik,
> What was the problem by the way?
When running lingvistics computation, machine got completely
unusable due to bad memory pressure. nice -n 19 was
useless. Memory limit should help.
--
64 bytes from 195.113.31.123: icmp_seq=28 ttl=51 time=448769.1 ms
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] RSS limit enforcement for 2.6
2004-03-18 22:04 ` Pavel Machek
@ 2004-03-25 14:44 ` Rik van Riel
2004-03-25 22:23 ` Pavel Machek
0 siblings, 1 reply; 6+ messages in thread
From: Rik van Riel @ 2004-03-25 14:44 UTC (permalink / raw)
To: Pavel Machek
Cc: Nick Piggin, Andrew Morton, linux-mm, linux-kernel, Hugh Dickins
On Thu, 18 Mar 2004, Pavel Machek wrote:
> When running lingvistics computation, machine got completely
> unusable due to bad memory pressure. nice -n 19 was
> useless. Memory limit should help.
Is this with the new patch, with the old patch or
without any RSS limiting patch ?
--
"Debugging is twice as hard as writing the code in the first place.
Therefore, if you write the code as cleverly as possible, you are,
by definition, not smart enough to debug it." - Brian W. Kernighan
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] RSS limit enforcement for 2.6
2004-03-25 14:44 ` Rik van Riel
@ 2004-03-25 22:23 ` Pavel Machek
0 siblings, 0 replies; 6+ messages in thread
From: Pavel Machek @ 2004-03-25 22:23 UTC (permalink / raw)
To: Rik van Riel
Cc: Nick Piggin, Andrew Morton, linux-mm, linux-kernel, Hugh Dickins
Hi!
> > When running lingvistics computation, machine got completely
> > unusable due to bad memory pressure. nice -n 19 was
> > useless. Memory limit should help.
>
> Is this with the new patch, with the old patch or
> without any RSS limiting patch ?
That was without any RSS limiting patch. I'm sorry, I have no time for
lingvistics just now.
Pavel
--
When do you have a heart between your knees?
[Johanka's followup: and *two* hearts?]
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] RSS limit enforcement for 2.6
@ 2004-01-27 17:51 Rik van Riel
0 siblings, 0 replies; 6+ messages in thread
From: Rik van Riel @ 2004-01-27 17:51 UTC (permalink / raw)
To: Andrew Morton; +Cc: Pavel Machek, Linus Torvalds, linux-mm, linux-kernel
Hi Andrew, Linus,
the patch below (softly) enforces RLIMIT_RSS in the 2.6 kernel,
it has been tested by Pavel and seems to work ok for his workload.
Please place it in -mm for more extensive testing.
thanks,
Rik
===== include/linux/init_task.h 1.27 vs edited =====
--- 1.27/include/linux/init_task.h Mon Aug 18 22:46:23 2003
+++ edited/include/linux/init_task.h Tue Jan 20 17:34:40 2004
@@ -2,6 +2,7 @@
#define _LINUX__INIT_TASK_H
#include <linux/file.h>
+#include <asm/resource.h>
#define INIT_FILES \
{ \
@@ -41,6 +42,7 @@
.page_table_lock = SPIN_LOCK_UNLOCKED, \
.mmlist = LIST_HEAD_INIT(name.mmlist), \
.default_kioctx = INIT_KIOCTX(name.default_kioctx, name), \
+ .rlimit_rss = RLIM_INFINITY \
}
#define INIT_SIGNALS(sig) { \
===== include/linux/sched.h 1.178 vs edited =====
--- 1.178/include/linux/sched.h Mon Jan 19 18:38:15 2004
+++ edited/include/linux/sched.h Tue Jan 20 17:32:56 2004
@@ -204,6 +204,7 @@
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long rss, total_vm, locked_vm;
unsigned long def_flags;
+ unsigned long rlimit_rss;
cpumask_t cpu_vm_mask;
unsigned long saved_auxv[40]; /* for /proc/PID/auxv */
===== include/linux/swap.h 1.80 vs edited =====
--- 1.80/include/linux/swap.h Mon Jan 19 01:28:35 2004
+++ edited/include/linux/swap.h Tue Jan 20 18:16:28 2004
@@ -179,7 +179,7 @@
/* linux/mm/rmap.c */
#ifdef CONFIG_MMU
-int FASTCALL(page_referenced(struct page *));
+int FASTCALL(page_referenced(struct page *, int *));
struct pte_chain *FASTCALL(page_add_rmap(struct page *, pte_t *,
struct pte_chain *));
void FASTCALL(page_remove_rmap(struct page *, pte_t *));
@@ -188,7 +188,7 @@
/* linux/mm/shmem.c */
extern int shmem_unuse(swp_entry_t entry, struct page *page);
#else
-#define page_referenced(page) TestClearPageReferenced(page)
+#define page_referenced(page, _x) TestClearPageReferenced(page)
#define try_to_unmap(page) SWAP_FAIL
#endif /* CONFIG_MMU */
===== kernel/sys.c 1.69 vs edited =====
--- 1.69/kernel/sys.c Mon Jan 19 18:38:13 2004
+++ edited/kernel/sys.c Tue Jan 20 18:02:19 2004
@@ -1308,6 +1308,14 @@
if (retval)
return retval;
+ /* The rlimit is specified in bytes, convert to pages for mm. */
+ if (resource == RLIMIT_RSS && current->mm) {
+ unsigned long pages = RLIM_INFINITY;
+ if (new_rlim.rlim_cur != RLIM_INFINITY)
+ pages = new_rlim.rlim_cur >> PAGE_SHIFT;
+ current->mm->rlimit_rss = pages;
+ }
+
*old_rlim = new_rlim;
return 0;
}
===== mm/rmap.c 1.34 vs edited =====
--- 1.34/mm/rmap.c Mon Jan 19 01:36:00 2004
+++ edited/mm/rmap.c Tue Jan 20 18:26:03 2004
@@ -104,6 +104,7 @@
/**
* page_referenced - test if the page was referenced
* @page: the page to test
+ * rsslimit: set if the process(es) using the page is(are) over RSS limit
*
* Quick test_and_clear_referenced for all mappings to a page,
* returns the number of processes which referenced the page.
@@ -112,8 +113,9 @@
* If the page has a single-entry pte_chain, collapse that back to a PageDirect
* representation. This way, it's only done under memory pressure.
*/
-int page_referenced(struct page * page)
+int page_referenced(struct page * page, int * rsslimit)
{
+ struct mm_struct * mm;
struct pte_chain *pc;
int referenced = 0;
@@ -127,10 +129,17 @@
pte_t *pte = rmap_ptep_map(page->pte.direct);
if (ptep_test_and_clear_young(pte))
referenced++;
+
+ mm = ptep_to_mm(pte);
+ if (mm->rss > mm->rlimit_rss)
+ *rsslimit = 1;
rmap_ptep_unmap(pte);
} else {
int nr_chains = 0;
+ /* We clear it if any task using the page is under its limit. */
+ *rsslimit = 1;
+
/* Check all the page tables mapping this page. */
for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) {
int i;
@@ -142,6 +151,10 @@
p = rmap_ptep_map(pte_paddr);
if (ptep_test_and_clear_young(p))
referenced++;
+
+ mm = ptep_to_mm(p);
+ if (mm->rss < mm->rlimit_rss)
+ *rsslimit = 0;
rmap_ptep_unmap(p);
nr_chains++;
}
===== mm/vmscan.c 1.177 vs edited =====
--- 1.177/mm/vmscan.c Mon Jan 19 18:38:07 2004
+++ edited/mm/vmscan.c Fri Jan 23 14:00:48 2004
@@ -250,6 +250,7 @@
LIST_HEAD(ret_pages);
struct pagevec freed_pvec;
int pgactivate = 0;
+ int over_rsslimit;
int ret = 0;
cond_resched();
@@ -278,8 +279,8 @@
goto keep_locked;
pte_chain_lock(page);
- referenced = page_referenced(page);
- if (referenced && page_mapping_inuse(page)) {
+ referenced = page_referenced(page, &over_rsslimit);
+ if (referenced && page_mapping_inuse(page) && !over_rsslimit) {
/* In active use or really unfreeable. Activate it. */
pte_chain_unlock(page);
goto activate_locked;
@@ -597,6 +598,7 @@
long mapped_ratio;
long distress;
long swap_tendency;
+ int over_rsslimit;
lru_add_drain();
pgmoved = 0;
@@ -657,7 +659,7 @@
list_del(&page->lru);
if (page_mapped(page)) {
pte_chain_lock(page);
- if (page_mapped(page) && page_referenced(page)) {
+ if (page_mapped(page) && page_referenced(page, &over_rsslimit) && !over_rsslimit) {
pte_chain_unlock(page);
list_add(&page->lru, &l_active);
continue;
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2004-03-25 22:23 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-03-15 23:21 [PATCH] RSS limit enforcement for 2.6 Rik van Riel
2004-03-16 6:08 ` Nick Piggin
2004-03-18 22:04 ` Pavel Machek
2004-03-25 14:44 ` Rik van Riel
2004-03-25 22:23 ` Pavel Machek
-- strict thread matches above, loose matches on Subject: below --
2004-01-27 17:51 Rik van Riel
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox